All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] KVM: PPC: Book3S HV: Simplify dynamic micro-threading code
@ 2017-06-27  6:07 ` Paul Mackerras
  0 siblings, 0 replies; 20+ messages in thread
From: Paul Mackerras @ 2017-06-27  6:07 UTC (permalink / raw)
  To: kvm, kvm-ppc; +Cc: David Gibson

Since commit b009031f74da ("KVM: PPC: Book3S HV: Take out virtual
core piggybacking code", 2016-09-15), we only have at most one
vcore per subcore.  Previously, the fact that there might be more
than one vcore per subcore meant that we had the notion of a
"master vcore", which was the vcore that controlled thread 0 of
the subcore.  We also needed a list per subcore in the core_info
struct to record which vcores belonged to each subcore.  Now that
there can only be one vcore in the subcore, we can replace the
list with a simple pointer and get rid of the notion of the
master vcore (and in fact treat every vcore as a master vcore).

We can also get rid of the subcore_vm[] field in the core_info
struct since it is never read.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/include/asm/kvm_book3s.h     |  1 -
 arch/powerpc/include/asm/kvm_book3s_asm.h |  2 +-
 arch/powerpc/kvm/book3s_hv.c              | 88 +++++++++++++------------------
 arch/powerpc/kvm/book3s_hv_builtin.c      |  2 +-
 4 files changed, 39 insertions(+), 54 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 2bf35017ffc0..b8d5b8e35244 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -86,7 +86,6 @@ struct kvmppc_vcore {
 	u16 last_cpu;
 	u8 vcore_state;
 	u8 in_guest;
-	struct kvmppc_vcore *master_vcore;
 	struct kvm_vcpu *runnable_threads[MAX_SMT_THREADS];
 	struct list_head preempt_list;
 	spinlock_t lock;
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index b148496ffe36..7cea76f11c26 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -81,7 +81,7 @@ struct kvm_split_mode {
 	u8		subcore_size;
 	u8		do_nap;
 	u8		napped[MAX_SMT_THREADS];
-	struct kvmppc_vcore *master_vcs[MAX_SUBCORES];
+	struct kvmppc_vcore *vc[MAX_SUBCORES];
 };
 
 /*
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index c4ada89be658..03d6c7f9b547 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2171,7 +2171,6 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
 {
 	int cpu;
 	struct paca_struct *tpaca;
-	struct kvmppc_vcore *mvc = vc->master_vcore;
 	struct kvm *kvm = vc->kvm;
 
 	cpu = vc->pcpu;
@@ -2181,7 +2180,7 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
 			vcpu->arch.timer_running = 0;
 		}
 		cpu += vcpu->arch.ptid;
-		vcpu->cpu = mvc->pcpu;
+		vcpu->cpu = vc->pcpu;
 		vcpu->arch.thread_cpu = cpu;
 
 		/*
@@ -2207,10 +2206,10 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
 	}
 	tpaca = &paca[cpu];
 	tpaca->kvm_hstate.kvm_vcpu = vcpu;
-	tpaca->kvm_hstate.ptid = cpu - mvc->pcpu;
+	tpaca->kvm_hstate.ptid = cpu - vc->pcpu;
 	/* Order stores to hstate.kvm_vcpu etc. before store to kvm_vcore */
 	smp_wmb();
-	tpaca->kvm_hstate.kvm_vcore = mvc;
+	tpaca->kvm_hstate.kvm_vcore = vc;
 	if (cpu != smp_processor_id())
 		kvmppc_ipi_thread(cpu);
 }
@@ -2339,8 +2338,7 @@ struct core_info {
 	int		max_subcore_threads;
 	int		total_threads;
 	int		subcore_threads[MAX_SUBCORES];
-	struct kvm	*subcore_vm[MAX_SUBCORES];
-	struct list_head vcs[MAX_SUBCORES];
+	struct kvmppc_vcore *vc[MAX_SUBCORES];
 };
 
 /*
@@ -2351,17 +2349,12 @@ static int subcore_thread_map[MAX_SUBCORES] = { 0, 4, 2, 6 };
 
 static void init_core_info(struct core_info *cip, struct kvmppc_vcore *vc)
 {
-	int sub;
-
 	memset(cip, 0, sizeof(*cip));
 	cip->n_subcores = 1;
 	cip->max_subcore_threads = vc->num_threads;
 	cip->total_threads = vc->num_threads;
 	cip->subcore_threads[0] = vc->num_threads;
-	cip->subcore_vm[0] = vc->kvm;
-	for (sub = 0; sub < MAX_SUBCORES; ++sub)
-		INIT_LIST_HEAD(&cip->vcs[sub]);
-	list_add_tail(&vc->preempt_list, &cip->vcs[0]);
+	cip->vc[0] = vc;
 }
 
 static bool subcore_config_ok(int n_subcores, int n_threads)
@@ -2381,9 +2374,8 @@ static bool subcore_config_ok(int n_subcores, int n_threads)
 	return n_subcores * roundup_pow_of_two(n_threads) <= MAX_SMT_THREADS;
 }
 
-static void init_master_vcore(struct kvmppc_vcore *vc)
+static void init_vcore_to_run(struct kvmppc_vcore *vc)
 {
-	vc->master_vcore = vc;
 	vc->entry_exit_map = 0;
 	vc->in_guest = 0;
 	vc->napping_threads = 0;
@@ -2408,9 +2400,9 @@ static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
 	++cip->n_subcores;
 	cip->total_threads += vc->num_threads;
 	cip->subcore_threads[sub] = vc->num_threads;
-	cip->subcore_vm[sub] = vc->kvm;
-	init_master_vcore(vc);
-	list_move_tail(&vc->preempt_list, &cip->vcs[sub]);
+	cip->vc[sub] = vc;
+	init_vcore_to_run(vc);
+	list_del_init(&vc->preempt_list);
 
 	return true;
 }
@@ -2515,7 +2507,6 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
 			wake_up(&vcpu->arch.cpu_run);
 		}
 	}
-	list_del_init(&vc->preempt_list);
 	if (!is_master) {
 		if (still_running > 0) {
 			kvmppc_vcore_preempt(vc);
@@ -2587,7 +2578,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 	int i;
 	int srcu_idx;
 	struct core_info core_info;
-	struct kvmppc_vcore *pvc, *vcnext;
+	struct kvmppc_vcore *pvc;
 	struct kvm_split_mode split_info, *sip;
 	int split, subcore_size, active;
 	int sub;
@@ -2610,7 +2601,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 	/*
 	 * Initialize *vc.
 	 */
-	init_master_vcore(vc);
+	init_vcore_to_run(vc);
 	vc->preempt_tb = TB_NIL;
 
 	/*
@@ -2670,9 +2661,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 		split_info.ldbar = mfspr(SPRN_LDBAR);
 		split_info.subcore_size = subcore_size;
 		for (sub = 0; sub < core_info.n_subcores; ++sub)
-			split_info.master_vcs[sub] =
-				list_first_entry(&core_info.vcs[sub],
-					struct kvmppc_vcore, preempt_list);
+			split_info.vc[sub] = core_info.vc[sub];
 		/* order writes to split_info before kvm_split_mode pointer */
 		smp_wmb();
 	}
@@ -2704,24 +2693,23 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 		thr = subcore_thread_map[sub];
 		thr0_done = false;
 		active |= 1 << thr;
-		list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list) {
-			pvc->pcpu = pcpu + thr;
-			for_each_runnable_thread(i, vcpu, pvc) {
-				kvmppc_start_thread(vcpu, pvc);
-				kvmppc_create_dtl_entry(vcpu, pvc);
-				trace_kvm_guest_enter(vcpu);
-				if (!vcpu->arch.ptid)
-					thr0_done = true;
-				active |= 1 << (thr + vcpu->arch.ptid);
-			}
-			/*
-			 * We need to start the first thread of each subcore
-			 * even if it doesn't have a vcpu.
-			 */
-			if (pvc->master_vcore == pvc && !thr0_done)
-				kvmppc_start_thread(NULL, pvc);
-			thr += pvc->num_threads;
+		pvc = core_info.vc[sub];
+		pvc->pcpu = pcpu + thr;
+		for_each_runnable_thread(i, vcpu, pvc) {
+			kvmppc_start_thread(vcpu, pvc);
+			kvmppc_create_dtl_entry(vcpu, pvc);
+			trace_kvm_guest_enter(vcpu);
+			if (!vcpu->arch.ptid)
+				thr0_done = true;
+			active |= 1 << (thr + vcpu->arch.ptid);
 		}
+		/*
+		 * We need to start the first thread of each subcore
+		 * even if it doesn't have a vcpu.
+		 */
+		if (!thr0_done)
+			kvmppc_start_thread(NULL, pvc);
+		thr += pvc->num_threads;
 	}
 
 	/*
@@ -2748,8 +2736,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 	trace_kvmppc_run_core(vc, 0);
 
 	for (sub = 0; sub < core_info.n_subcores; ++sub)
-		list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list)
-			spin_unlock(&pvc->lock);
+		spin_unlock(&core_info.vc[sub]->lock);
 
 	guest_enter();
 
@@ -2802,10 +2789,10 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 	smp_mb();
 	guest_exit();
 
-	for (sub = 0; sub < core_info.n_subcores; ++sub)
-		list_for_each_entry_safe(pvc, vcnext, &core_info.vcs[sub],
-					 preempt_list)
-			post_guest_process(pvc, pvc == vc);
+	for (sub = 0; sub < core_info.n_subcores; ++sub) {
+		pvc = core_info.vc[sub];
+		post_guest_process(pvc, pvc == vc);
+	}
 
 	spin_lock(&vc->lock);
 	preempt_enable();
@@ -3026,15 +3013,14 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	 */
 	if (!signal_pending(current)) {
 		if (vc->vcore_state == VCORE_PIGGYBACK) {
-			struct kvmppc_vcore *mvc = vc->master_vcore;
-			if (spin_trylock(&mvc->lock)) {
-				if (mvc->vcore_state == VCORE_RUNNING &&
-				    !VCORE_IS_EXITING(mvc)) {
+			if (spin_trylock(&vc->lock)) {
+				if (vc->vcore_state == VCORE_RUNNING &&
+				    !VCORE_IS_EXITING(vc)) {
 					kvmppc_create_dtl_entry(vcpu, vc);
 					kvmppc_start_thread(vcpu, vc);
 					trace_kvm_guest_enter(vcpu);
 				}
-				spin_unlock(&mvc->lock);
+				spin_unlock(&vc->lock);
 			}
 		} else if (vc->vcore_state == VCORE_RUNNING &&
 			   !VCORE_IS_EXITING(vc)) {
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index ee4c2558c305..90644db9d38e 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -307,7 +307,7 @@ void kvmhv_commence_exit(int trap)
 		return;
 
 	for (i = 0; i < MAX_SUBCORES; ++i) {
-		vc = sip->master_vcs[i];
+		vc = sip->vc[i];
 		if (!vc)
 			break;
 		do {
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 1/2] KVM: PPC: Book3S HV: Simplify dynamic micro-threading code
@ 2017-06-27  6:07 ` Paul Mackerras
  0 siblings, 0 replies; 20+ messages in thread
From: Paul Mackerras @ 2017-06-27  6:07 UTC (permalink / raw)
  To: kvm, kvm-ppc; +Cc: David Gibson

Since commit b009031f74da ("KVM: PPC: Book3S HV: Take out virtual
core piggybacking code", 2016-09-15), we only have at most one
vcore per subcore.  Previously, the fact that there might be more
than one vcore per subcore meant that we had the notion of a
"master vcore", which was the vcore that controlled thread 0 of
the subcore.  We also needed a list per subcore in the core_info
struct to record which vcores belonged to each subcore.  Now that
there can only be one vcore in the subcore, we can replace the
list with a simple pointer and get rid of the notion of the
master vcore (and in fact treat every vcore as a master vcore).

We can also get rid of the subcore_vm[] field in the core_info
struct since it is never read.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/include/asm/kvm_book3s.h     |  1 -
 arch/powerpc/include/asm/kvm_book3s_asm.h |  2 +-
 arch/powerpc/kvm/book3s_hv.c              | 88 +++++++++++++------------------
 arch/powerpc/kvm/book3s_hv_builtin.c      |  2 +-
 4 files changed, 39 insertions(+), 54 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 2bf35017ffc0..b8d5b8e35244 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -86,7 +86,6 @@ struct kvmppc_vcore {
 	u16 last_cpu;
 	u8 vcore_state;
 	u8 in_guest;
-	struct kvmppc_vcore *master_vcore;
 	struct kvm_vcpu *runnable_threads[MAX_SMT_THREADS];
 	struct list_head preempt_list;
 	spinlock_t lock;
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index b148496ffe36..7cea76f11c26 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -81,7 +81,7 @@ struct kvm_split_mode {
 	u8		subcore_size;
 	u8		do_nap;
 	u8		napped[MAX_SMT_THREADS];
-	struct kvmppc_vcore *master_vcs[MAX_SUBCORES];
+	struct kvmppc_vcore *vc[MAX_SUBCORES];
 };
 
 /*
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index c4ada89be658..03d6c7f9b547 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2171,7 +2171,6 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
 {
 	int cpu;
 	struct paca_struct *tpaca;
-	struct kvmppc_vcore *mvc = vc->master_vcore;
 	struct kvm *kvm = vc->kvm;
 
 	cpu = vc->pcpu;
@@ -2181,7 +2180,7 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
 			vcpu->arch.timer_running = 0;
 		}
 		cpu += vcpu->arch.ptid;
-		vcpu->cpu = mvc->pcpu;
+		vcpu->cpu = vc->pcpu;
 		vcpu->arch.thread_cpu = cpu;
 
 		/*
@@ -2207,10 +2206,10 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
 	}
 	tpaca = &paca[cpu];
 	tpaca->kvm_hstate.kvm_vcpu = vcpu;
-	tpaca->kvm_hstate.ptid = cpu - mvc->pcpu;
+	tpaca->kvm_hstate.ptid = cpu - vc->pcpu;
 	/* Order stores to hstate.kvm_vcpu etc. before store to kvm_vcore */
 	smp_wmb();
-	tpaca->kvm_hstate.kvm_vcore = mvc;
+	tpaca->kvm_hstate.kvm_vcore = vc;
 	if (cpu != smp_processor_id())
 		kvmppc_ipi_thread(cpu);
 }
@@ -2339,8 +2338,7 @@ struct core_info {
 	int		max_subcore_threads;
 	int		total_threads;
 	int		subcore_threads[MAX_SUBCORES];
-	struct kvm	*subcore_vm[MAX_SUBCORES];
-	struct list_head vcs[MAX_SUBCORES];
+	struct kvmppc_vcore *vc[MAX_SUBCORES];
 };
 
 /*
@@ -2351,17 +2349,12 @@ static int subcore_thread_map[MAX_SUBCORES] = { 0, 4, 2, 6 };
 
 static void init_core_info(struct core_info *cip, struct kvmppc_vcore *vc)
 {
-	int sub;
-
 	memset(cip, 0, sizeof(*cip));
 	cip->n_subcores = 1;
 	cip->max_subcore_threads = vc->num_threads;
 	cip->total_threads = vc->num_threads;
 	cip->subcore_threads[0] = vc->num_threads;
-	cip->subcore_vm[0] = vc->kvm;
-	for (sub = 0; sub < MAX_SUBCORES; ++sub)
-		INIT_LIST_HEAD(&cip->vcs[sub]);
-	list_add_tail(&vc->preempt_list, &cip->vcs[0]);
+	cip->vc[0] = vc;
 }
 
 static bool subcore_config_ok(int n_subcores, int n_threads)
@@ -2381,9 +2374,8 @@ static bool subcore_config_ok(int n_subcores, int n_threads)
 	return n_subcores * roundup_pow_of_two(n_threads) <= MAX_SMT_THREADS;
 }
 
-static void init_master_vcore(struct kvmppc_vcore *vc)
+static void init_vcore_to_run(struct kvmppc_vcore *vc)
 {
-	vc->master_vcore = vc;
 	vc->entry_exit_map = 0;
 	vc->in_guest = 0;
 	vc->napping_threads = 0;
@@ -2408,9 +2400,9 @@ static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
 	++cip->n_subcores;
 	cip->total_threads += vc->num_threads;
 	cip->subcore_threads[sub] = vc->num_threads;
-	cip->subcore_vm[sub] = vc->kvm;
-	init_master_vcore(vc);
-	list_move_tail(&vc->preempt_list, &cip->vcs[sub]);
+	cip->vc[sub] = vc;
+	init_vcore_to_run(vc);
+	list_del_init(&vc->preempt_list);
 
 	return true;
 }
@@ -2515,7 +2507,6 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
 			wake_up(&vcpu->arch.cpu_run);
 		}
 	}
-	list_del_init(&vc->preempt_list);
 	if (!is_master) {
 		if (still_running > 0) {
 			kvmppc_vcore_preempt(vc);
@@ -2587,7 +2578,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 	int i;
 	int srcu_idx;
 	struct core_info core_info;
-	struct kvmppc_vcore *pvc, *vcnext;
+	struct kvmppc_vcore *pvc;
 	struct kvm_split_mode split_info, *sip;
 	int split, subcore_size, active;
 	int sub;
@@ -2610,7 +2601,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 	/*
 	 * Initialize *vc.
 	 */
-	init_master_vcore(vc);
+	init_vcore_to_run(vc);
 	vc->preempt_tb = TB_NIL;
 
 	/*
@@ -2670,9 +2661,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 		split_info.ldbar = mfspr(SPRN_LDBAR);
 		split_info.subcore_size = subcore_size;
 		for (sub = 0; sub < core_info.n_subcores; ++sub)
-			split_info.master_vcs[sub] -				list_first_entry(&core_info.vcs[sub],
-					struct kvmppc_vcore, preempt_list);
+			split_info.vc[sub] = core_info.vc[sub];
 		/* order writes to split_info before kvm_split_mode pointer */
 		smp_wmb();
 	}
@@ -2704,24 +2693,23 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 		thr = subcore_thread_map[sub];
 		thr0_done = false;
 		active |= 1 << thr;
-		list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list) {
-			pvc->pcpu = pcpu + thr;
-			for_each_runnable_thread(i, vcpu, pvc) {
-				kvmppc_start_thread(vcpu, pvc);
-				kvmppc_create_dtl_entry(vcpu, pvc);
-				trace_kvm_guest_enter(vcpu);
-				if (!vcpu->arch.ptid)
-					thr0_done = true;
-				active |= 1 << (thr + vcpu->arch.ptid);
-			}
-			/*
-			 * We need to start the first thread of each subcore
-			 * even if it doesn't have a vcpu.
-			 */
-			if (pvc->master_vcore = pvc && !thr0_done)
-				kvmppc_start_thread(NULL, pvc);
-			thr += pvc->num_threads;
+		pvc = core_info.vc[sub];
+		pvc->pcpu = pcpu + thr;
+		for_each_runnable_thread(i, vcpu, pvc) {
+			kvmppc_start_thread(vcpu, pvc);
+			kvmppc_create_dtl_entry(vcpu, pvc);
+			trace_kvm_guest_enter(vcpu);
+			if (!vcpu->arch.ptid)
+				thr0_done = true;
+			active |= 1 << (thr + vcpu->arch.ptid);
 		}
+		/*
+		 * We need to start the first thread of each subcore
+		 * even if it doesn't have a vcpu.
+		 */
+		if (!thr0_done)
+			kvmppc_start_thread(NULL, pvc);
+		thr += pvc->num_threads;
 	}
 
 	/*
@@ -2748,8 +2736,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 	trace_kvmppc_run_core(vc, 0);
 
 	for (sub = 0; sub < core_info.n_subcores; ++sub)
-		list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list)
-			spin_unlock(&pvc->lock);
+		spin_unlock(&core_info.vc[sub]->lock);
 
 	guest_enter();
 
@@ -2802,10 +2789,10 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 	smp_mb();
 	guest_exit();
 
-	for (sub = 0; sub < core_info.n_subcores; ++sub)
-		list_for_each_entry_safe(pvc, vcnext, &core_info.vcs[sub],
-					 preempt_list)
-			post_guest_process(pvc, pvc = vc);
+	for (sub = 0; sub < core_info.n_subcores; ++sub) {
+		pvc = core_info.vc[sub];
+		post_guest_process(pvc, pvc = vc);
+	}
 
 	spin_lock(&vc->lock);
 	preempt_enable();
@@ -3026,15 +3013,14 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	 */
 	if (!signal_pending(current)) {
 		if (vc->vcore_state = VCORE_PIGGYBACK) {
-			struct kvmppc_vcore *mvc = vc->master_vcore;
-			if (spin_trylock(&mvc->lock)) {
-				if (mvc->vcore_state = VCORE_RUNNING &&
-				    !VCORE_IS_EXITING(mvc)) {
+			if (spin_trylock(&vc->lock)) {
+				if (vc->vcore_state = VCORE_RUNNING &&
+				    !VCORE_IS_EXITING(vc)) {
 					kvmppc_create_dtl_entry(vcpu, vc);
 					kvmppc_start_thread(vcpu, vc);
 					trace_kvm_guest_enter(vcpu);
 				}
-				spin_unlock(&mvc->lock);
+				spin_unlock(&vc->lock);
 			}
 		} else if (vc->vcore_state = VCORE_RUNNING &&
 			   !VCORE_IS_EXITING(vc)) {
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index ee4c2558c305..90644db9d38e 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -307,7 +307,7 @@ void kvmhv_commence_exit(int trap)
 		return;
 
 	for (i = 0; i < MAX_SUBCORES; ++i) {
-		vc = sip->master_vcs[i];
+		vc = sip->vc[i];
 		if (!vc)
 			break;
 		do {
-- 
2.11.0


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 2/2] KVM: PPC: Book3S HV: Close race with testing for signals on guest entry
  2017-06-27  6:07 ` Paul Mackerras
@ 2017-06-27  6:09   ` Paul Mackerras
  -1 siblings, 0 replies; 20+ messages in thread
From: Paul Mackerras @ 2017-06-27  6:09 UTC (permalink / raw)
  To: kvm, kvm-ppc; +Cc: David Gibson

At present, interrupts are hard-disabled fairly late in the guest
entry path, in the assembly code.  Since we check for pending signals
for the vCPU(s) task(s) earlier in the guest entry path, it is
possible for a signal to be delivered before we enter the guest but
not be noticed until after we exit the guest for some other reason.

Similarly, it is possible for the scheduler to request a reschedule
while we are in the guest entry path, and we won't notice until after
we have run the guest, potentially for a whole timeslice.

To close these races, we need to check for signals and reschedule
requests after hard-disabling interrupts, and then keep interrupts
hard-disabled until we enter the guest.  If there is a signal or a
reschedule request from another CPU, it will send an IPI, which will
cause a guest exit.

This puts the interrupt disabling before we call kvmppc_start_thread()
for all the secondary threads of this core that are going to run vCPUs.
The reason for that is that once we have started the secondary threads
there is no easy way to back out without going through at least part
of the guest entry path.  However, kvmppc_start_thread() includes some
code for radix guests which needs to call smp_call_function(), which
must be called with interrupts enabled.  To solve this problem, this
patch moves that code into a separate function that is called earlier.

When the guest exit is caused by an external interrupt, a hypervisor
doorbell or a hypervisor maintenance interrupt, we now handle these
using the replay facility.  __kvmppc_vcore_entry() now returns the
trap number that caused the exit on this thread, and instead of the
assembly code jumping to the handler entry, we return to C code with
interrupts still hard-disabled and set the irq_happened flag in the
PACA, so that when we do local_irq_enable() the appropriate handler
gets called.

With all this, we now have the interrupt soft-enable flag clear while
we are in the guest.  This is useful because code in the real-mode
hypercall handlers that checks whether interrupts are enabled will
now see that they are disabled, which is correct, since interrupts
are hard-disabled in the real-mode code.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_hv.c            | 140 +++++++++++++++++++++++++-------
 arch/powerpc/kvm/book3s_hv_interrupts.S |   8 +-
 arch/powerpc/kvm/book3s_hv_rmhandlers.S |  49 ++---------
 3 files changed, 118 insertions(+), 79 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 03d6c7f9b547..c6313c5d331c 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -647,6 +647,7 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
 	unsigned long stolen;
 	unsigned long core_stolen;
 	u64 now;
+	unsigned long flags;
 
 	dt = vcpu->arch.dtl_ptr;
 	vpa = vcpu->arch.vpa.pinned_addr;
@@ -654,10 +655,10 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
 	core_stolen = vcore_stolen_time(vc, now);
 	stolen = core_stolen - vcpu->arch.stolen_logged;
 	vcpu->arch.stolen_logged = core_stolen;
-	spin_lock_irq(&vcpu->arch.tbacct_lock);
+	spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
 	stolen += vcpu->arch.busy_stolen;
 	vcpu->arch.busy_stolen = 0;
-	spin_unlock_irq(&vcpu->arch.tbacct_lock);
+	spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
 	if (!dt || !vpa)
 		return;
 	memset(dt, 0, sizeof(struct dtl_entry));
@@ -2085,7 +2086,7 @@ static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
 	}
 }
 
-extern void __kvmppc_vcore_entry(void);
+extern int __kvmppc_vcore_entry(void);
 
 static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
 				   struct kvm_vcpu *vcpu)
@@ -2167,6 +2168,31 @@ static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
 			smp_call_function_single(cpu + i, do_nothing, NULL, 1);
 }
 
+static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
+{
+	struct kvm *kvm = vcpu->kvm;
+
+	/*
+	 * With radix, the guest can do TLB invalidations itself,
+	 * and it could choose to use the local form (tlbiel) if
+	 * it is invalidating a translation that has only ever been
+	 * used on one vcpu.  However, that doesn't mean it has
+	 * only ever been used on one physical cpu, since vcpus
+	 * can move around between pcpus.  To cope with this, when
+	 * a vcpu moves from one pcpu to another, we need to tell
+	 * any vcpus running on the same core as this vcpu previously
+	 * ran to flush the TLB.  The TLB is shared between threads,
+	 * so we use a single bit in .need_tlb_flush for all 4 threads.
+	 */
+	if (vcpu->arch.prev_cpu != pcpu) {
+		if (vcpu->arch.prev_cpu >= 0 &&
+		    cpu_first_thread_sibling(vcpu->arch.prev_cpu) !=
+		    cpu_first_thread_sibling(pcpu))
+			radix_flush_cpu(kvm, vcpu->arch.prev_cpu, vcpu);
+		vcpu->arch.prev_cpu = pcpu;
+	}
+}
+
 static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
 {
 	int cpu;
@@ -2182,26 +2208,6 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
 		cpu += vcpu->arch.ptid;
 		vcpu->cpu = vc->pcpu;
 		vcpu->arch.thread_cpu = cpu;
-
-		/*
-		 * With radix, the guest can do TLB invalidations itself,
-		 * and it could choose to use the local form (tlbiel) if
-		 * it is invalidating a translation that has only ever been
-		 * used on one vcpu.  However, that doesn't mean it has
-		 * only ever been used on one physical cpu, since vcpus
-		 * can move around between pcpus.  To cope with this, when
-		 * a vcpu moves from one pcpu to another, we need to tell
-		 * any vcpus running on the same core as this vcpu previously
-		 * ran to flush the TLB.  The TLB is shared between threads,
-		 * so we use a single bit in .need_tlb_flush for all 4 threads.
-		 */
-		if (kvm_is_radix(kvm) && vcpu->arch.prev_cpu != cpu) {
-			if (vcpu->arch.prev_cpu >= 0 &&
-			    cpu_first_thread_sibling(vcpu->arch.prev_cpu) !=
-			    cpu_first_thread_sibling(cpu))
-				radix_flush_cpu(kvm, vcpu->arch.prev_cpu, vcpu);
-			vcpu->arch.prev_cpu = cpu;
-		}
 		cpumask_set_cpu(cpu, &kvm->arch.cpu_in_guest);
 	}
 	tpaca = &paca[cpu];
@@ -2470,6 +2476,18 @@ static void collect_piggybacks(struct core_info *cip, int target_threads)
 	spin_unlock(&lp->lock);
 }
 
+static bool recheck_signals(struct core_info *cip)
+{
+	int sub, i;
+	struct kvm_vcpu *vcpu;
+
+	for (sub = 0; sub < cip->n_subcores; ++sub)
+		for_each_runnable_thread(i, vcpu, cip->vc[sub])
+			if (signal_pending(vcpu->arch.run_task))
+				return true;
+	return false;
+}
+
 static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
 {
 	int still_running = 0, i;
@@ -2568,6 +2586,21 @@ static inline int kvmppc_set_host_core(unsigned int cpu)
 	return 0;
 }
 
+static void set_irq_happened(int trap)
+{
+	switch (trap) {
+	case BOOK3S_INTERRUPT_EXTERNAL:
+		local_paca->irq_happened |= PACA_IRQ_EE;
+		break;
+	case BOOK3S_INTERRUPT_H_DOORBELL:
+		local_paca->irq_happened |= PACA_IRQ_DBELL;
+		break;
+	case BOOK3S_INTERRUPT_HMI:
+		local_paca->irq_happened |= PACA_IRQ_HMI;
+		break;
+	}
+}
+
 /*
  * Run a set of guest threads on a physical core.
  * Called with vc->lock held.
@@ -2587,6 +2620,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 	int pcpu, thr;
 	int target_threads;
 	int controlled_threads;
+	int trap;
 
 	/*
 	 * Remove from the list any threads that have a signal pending
@@ -2638,6 +2672,43 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 	if (vc->num_threads < target_threads)
 		collect_piggybacks(&core_info, target_threads);
 
+	/*
+	 * On radix, arrange for TLB flushing if necessary.
+	 * This has to be done before disabling interrupts since
+	 * it uses smp_call_function().
+	 */
+	pcpu = smp_processor_id();
+	if (kvm_is_radix(vc->kvm)) {
+		for (sub = 0; sub < core_info.n_subcores; ++sub)
+			for_each_runnable_thread(i, vcpu, core_info.vc[sub])
+				kvmppc_prepare_radix_vcpu(vcpu, pcpu);
+	}
+
+	/*
+	 * Hard-disable interrupts, and check resched flag and signals.
+	 * If we need to reschedule or deliver a signal, clean up
+	 * and return without going into the guest(s).
+	 */
+	local_irq_disable();
+	hard_irq_disable();
+	if (lazy_irq_pending() || need_resched() ||
+	    recheck_signals(&core_info)) {
+		local_irq_enable();
+		vc->vcore_state = VCORE_INACTIVE;
+		/* Unlock all except the primary vcore */
+		for (sub = 1; sub < core_info.n_subcores; ++sub) {
+			pvc = core_info.vc[sub];
+			/* Put back on to the preempted vcores list */
+			kvmppc_vcore_preempt(pvc);
+			spin_unlock(&pvc->lock);
+		}
+		for (i = 0; i < controlled_threads; ++i)
+			kvmppc_release_hwthread(pcpu + i);
+		return;
+	}
+
+	kvmppc_clear_host_core(pcpu);
+
 	/* Decide on micro-threading (split-core) mode */
 	subcore_size = threads_per_subcore;
 	cmd_bit = stat_bit = 0;
@@ -2665,7 +2736,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 		/* order writes to split_info before kvm_split_mode pointer */
 		smp_wmb();
 	}
-	pcpu = smp_processor_id();
 	for (thr = 0; thr < controlled_threads; ++thr)
 		paca[pcpu + thr].kvm_hstate.kvm_split_mode = sip;
 
@@ -2685,8 +2755,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 		}
 	}
 
-	kvmppc_clear_host_core(pcpu);
-
 	/* Start all the threads */
 	active = 0;
 	for (sub = 0; sub < core_info.n_subcores; ++sub) {
@@ -2738,14 +2806,25 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 	for (sub = 0; sub < core_info.n_subcores; ++sub)
 		spin_unlock(&core_info.vc[sub]->lock);
 
+	/*
+	 * Interrupts will be enabled once we get into the guest,
+	 * so tell lockdep that we're about to enable interrupts.
+	 */
+	trace_hardirqs_on();
+
 	guest_enter();
 
 	srcu_idx = srcu_read_lock(&vc->kvm->srcu);
 
-	__kvmppc_vcore_entry();
+	trap = __kvmppc_vcore_entry();
 
 	srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
 
+	guest_exit();
+
+	trace_hardirqs_off();
+	set_irq_happened(trap);
+
 	spin_lock(&vc->lock);
 	/* prevent other vcpu threads from doing kvmppc_start_thread() now */
 	vc->vcore_state = VCORE_EXITING;
@@ -2773,6 +2852,10 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 		split_info.do_nap = 0;
 	}
 
+	kvmppc_set_host_core(pcpu);
+
+	local_irq_enable();
+
 	/* Let secondaries go back to the offline loop */
 	for (i = 0; i < controlled_threads; ++i) {
 		kvmppc_release_hwthread(pcpu + i);
@@ -2781,13 +2864,10 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 		cpumask_clear_cpu(pcpu + i, &vc->kvm->arch.cpu_in_guest);
 	}
 
-	kvmppc_set_host_core(pcpu);
-
 	spin_unlock(&vc->lock);
 
 	/* make sure updates to secondary vcpu structs are visible now */
 	smp_mb();
-	guest_exit();
 
 	for (sub = 0; sub < core_info.n_subcores; ++sub) {
 		pvc = core_info.vc[sub];
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
index 404deb512844..dc54373c8780 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -61,13 +61,6 @@ BEGIN_FTR_SECTION
 	std	r3, HSTATE_DABR(r13)
 END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 
-	/* Hard-disable interrupts */
-	mfmsr   r10
-	std	r10, HSTATE_HOST_MSR(r13)
-	rldicl  r10,r10,48,1
-	rotldi  r10,r10,16
-	mtmsrd  r10,1
-
 	/* Save host PMU registers */
 BEGIN_FTR_SECTION
 	/* Work around P8 PMAE bug */
@@ -153,6 +146,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 	 *
 	 * R1       = host R1
 	 * R2       = host R2
+	 * R3       = trap number on this thread
 	 * R12      = exit handler id
 	 * R13      = PACA
 	 */
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index e3793bd510fe..8a3b10d42b2a 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -69,6 +69,7 @@ _GLOBAL_TOC(kvmppc_hv_entry_trampoline)
 	std	r0, PPC_LR_STKOFF(r1)
 	stdu	r1, -112(r1)
 	mfmsr	r10
+	std	r10, HSTATE_HOST_MSR(r13)
 	LOAD_REG_ADDR(r5, kvmppc_call_hv_entry)
 	li	r0,MSR_RI
 	andc	r0,r10,r0
@@ -165,6 +166,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	addi	r1, r1, 112
 	ld	r7, HSTATE_HOST_MSR(r13)
 
+	/* Return the trap number on this thread as the return value */
+	mr	r3, r12
+
 	/*
 	 * If we came back from the guest via a relocation-on interrupt,
 	 * we will be in virtual mode at this point, which makes it a
@@ -174,59 +178,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	andi.	r0, r0, MSR_IR		/* in real mode? */
 	bne	.Lvirt_return
 
-	cmpwi	r12, BOOK3S_INTERRUPT_EXTERNAL
-	beq	11f
-	cmpwi	r12, BOOK3S_INTERRUPT_H_DOORBELL
-	beq 	15f	/* Invoke the H_DOORBELL handler */
-	cmpwi	cr2, r12, BOOK3S_INTERRUPT_HMI
-	beq	cr2, 14f			/* HMI check */
-
-	/* RFI into the highmem handler, or branch to interrupt handler */
+	/* RFI into the highmem handler */
 	mfmsr	r6
 	li	r0, MSR_RI
 	andc	r6, r6, r0
 	mtmsrd	r6, 1			/* Clear RI in MSR */
 	mtsrr0	r8
 	mtsrr1	r7
-	/*
-	 * BOOK3S_INTERRUPT_MACHINE_CHECK is handled at the
-	 * time of guest exit
-	 */
 	RFI
 
-	/* On POWER7, we have external interrupts set to use HSRR0/1 */
-11:	mtspr	SPRN_HSRR0, r8
-	mtspr	SPRN_HSRR1, r7
-	ba	0x500
-
-14:	mtspr	SPRN_HSRR0, r8
-	mtspr	SPRN_HSRR1, r7
-	b	hmi_exception_after_realmode
-
-15:	mtspr SPRN_HSRR0, r8
-	mtspr SPRN_HSRR1, r7
-	ba    0xe80
-
-	/* Virtual-mode return - can't get here for HMI or machine check */
+	/* Virtual-mode return */
 .Lvirt_return:
-	cmpwi	r12, BOOK3S_INTERRUPT_EXTERNAL
-	beq	16f
-	cmpwi	r12, BOOK3S_INTERRUPT_H_DOORBELL
-	beq	17f
-	andi.	r0, r7, MSR_EE		/* were interrupts hard-enabled? */
-	beq	18f
-	mtmsrd	r7, 1			/* if so then re-enable them */
-18:	mtlr	r8
+	mtlr	r8
 	blr
 
-16:	mtspr	SPRN_HSRR0, r8		/* jump to reloc-on external vector */
-	mtspr	SPRN_HSRR1, r7
-	b	exc_virt_0x4500_hardware_interrupt
-
-17:	mtspr	SPRN_HSRR0, r8
-	mtspr	SPRN_HSRR1, r7
-	b	exc_virt_0x4e80_h_doorbell
-
 kvmppc_primary_no_guest:
 	/* We handle this much like a ceded vcpu */
 	/* put the HDEC into the DEC, since HDEC interrupts don't wake us */
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 2/2] KVM: PPC: Book3S HV: Close race with testing for signals on guest entry
@ 2017-06-27  6:09   ` Paul Mackerras
  0 siblings, 0 replies; 20+ messages in thread
From: Paul Mackerras @ 2017-06-27  6:09 UTC (permalink / raw)
  To: kvm, kvm-ppc; +Cc: David Gibson

At present, interrupts are hard-disabled fairly late in the guest
entry path, in the assembly code.  Since we check for pending signals
for the vCPU(s) task(s) earlier in the guest entry path, it is
possible for a signal to be delivered before we enter the guest but
not be noticed until after we exit the guest for some other reason.

Similarly, it is possible for the scheduler to request a reschedule
while we are in the guest entry path, and we won't notice until after
we have run the guest, potentially for a whole timeslice.

To close these races, we need to check for signals and reschedule
requests after hard-disabling interrupts, and then keep interrupts
hard-disabled until we enter the guest.  If there is a signal or a
reschedule request from another CPU, it will send an IPI, which will
cause a guest exit.

This puts the interrupt disabling before we call kvmppc_start_thread()
for all the secondary threads of this core that are going to run vCPUs.
The reason for that is that once we have started the secondary threads
there is no easy way to back out without going through at least part
of the guest entry path.  However, kvmppc_start_thread() includes some
code for radix guests which needs to call smp_call_function(), which
must be called with interrupts enabled.  To solve this problem, this
patch moves that code into a separate function that is called earlier.

When the guest exit is caused by an external interrupt, a hypervisor
doorbell or a hypervisor maintenance interrupt, we now handle these
using the replay facility.  __kvmppc_vcore_entry() now returns the
trap number that caused the exit on this thread, and instead of the
assembly code jumping to the handler entry, we return to C code with
interrupts still hard-disabled and set the irq_happened flag in the
PACA, so that when we do local_irq_enable() the appropriate handler
gets called.

With all this, we now have the interrupt soft-enable flag clear while
we are in the guest.  This is useful because code in the real-mode
hypercall handlers that checks whether interrupts are enabled will
now see that they are disabled, which is correct, since interrupts
are hard-disabled in the real-mode code.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_hv.c            | 140 +++++++++++++++++++++++++-------
 arch/powerpc/kvm/book3s_hv_interrupts.S |   8 +-
 arch/powerpc/kvm/book3s_hv_rmhandlers.S |  49 ++---------
 3 files changed, 118 insertions(+), 79 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 03d6c7f9b547..c6313c5d331c 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -647,6 +647,7 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
 	unsigned long stolen;
 	unsigned long core_stolen;
 	u64 now;
+	unsigned long flags;
 
 	dt = vcpu->arch.dtl_ptr;
 	vpa = vcpu->arch.vpa.pinned_addr;
@@ -654,10 +655,10 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
 	core_stolen = vcore_stolen_time(vc, now);
 	stolen = core_stolen - vcpu->arch.stolen_logged;
 	vcpu->arch.stolen_logged = core_stolen;
-	spin_lock_irq(&vcpu->arch.tbacct_lock);
+	spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
 	stolen += vcpu->arch.busy_stolen;
 	vcpu->arch.busy_stolen = 0;
-	spin_unlock_irq(&vcpu->arch.tbacct_lock);
+	spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
 	if (!dt || !vpa)
 		return;
 	memset(dt, 0, sizeof(struct dtl_entry));
@@ -2085,7 +2086,7 @@ static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
 	}
 }
 
-extern void __kvmppc_vcore_entry(void);
+extern int __kvmppc_vcore_entry(void);
 
 static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
 				   struct kvm_vcpu *vcpu)
@@ -2167,6 +2168,31 @@ static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
 			smp_call_function_single(cpu + i, do_nothing, NULL, 1);
 }
 
+static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
+{
+	struct kvm *kvm = vcpu->kvm;
+
+	/*
+	 * With radix, the guest can do TLB invalidations itself,
+	 * and it could choose to use the local form (tlbiel) if
+	 * it is invalidating a translation that has only ever been
+	 * used on one vcpu.  However, that doesn't mean it has
+	 * only ever been used on one physical cpu, since vcpus
+	 * can move around between pcpus.  To cope with this, when
+	 * a vcpu moves from one pcpu to another, we need to tell
+	 * any vcpus running on the same core as this vcpu previously
+	 * ran to flush the TLB.  The TLB is shared between threads,
+	 * so we use a single bit in .need_tlb_flush for all 4 threads.
+	 */
+	if (vcpu->arch.prev_cpu != pcpu) {
+		if (vcpu->arch.prev_cpu >= 0 &&
+		    cpu_first_thread_sibling(vcpu->arch.prev_cpu) !+		    cpu_first_thread_sibling(pcpu))
+			radix_flush_cpu(kvm, vcpu->arch.prev_cpu, vcpu);
+		vcpu->arch.prev_cpu = pcpu;
+	}
+}
+
 static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
 {
 	int cpu;
@@ -2182,26 +2208,6 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
 		cpu += vcpu->arch.ptid;
 		vcpu->cpu = vc->pcpu;
 		vcpu->arch.thread_cpu = cpu;
-
-		/*
-		 * With radix, the guest can do TLB invalidations itself,
-		 * and it could choose to use the local form (tlbiel) if
-		 * it is invalidating a translation that has only ever been
-		 * used on one vcpu.  However, that doesn't mean it has
-		 * only ever been used on one physical cpu, since vcpus
-		 * can move around between pcpus.  To cope with this, when
-		 * a vcpu moves from one pcpu to another, we need to tell
-		 * any vcpus running on the same core as this vcpu previously
-		 * ran to flush the TLB.  The TLB is shared between threads,
-		 * so we use a single bit in .need_tlb_flush for all 4 threads.
-		 */
-		if (kvm_is_radix(kvm) && vcpu->arch.prev_cpu != cpu) {
-			if (vcpu->arch.prev_cpu >= 0 &&
-			    cpu_first_thread_sibling(vcpu->arch.prev_cpu) !-			    cpu_first_thread_sibling(cpu))
-				radix_flush_cpu(kvm, vcpu->arch.prev_cpu, vcpu);
-			vcpu->arch.prev_cpu = cpu;
-		}
 		cpumask_set_cpu(cpu, &kvm->arch.cpu_in_guest);
 	}
 	tpaca = &paca[cpu];
@@ -2470,6 +2476,18 @@ static void collect_piggybacks(struct core_info *cip, int target_threads)
 	spin_unlock(&lp->lock);
 }
 
+static bool recheck_signals(struct core_info *cip)
+{
+	int sub, i;
+	struct kvm_vcpu *vcpu;
+
+	for (sub = 0; sub < cip->n_subcores; ++sub)
+		for_each_runnable_thread(i, vcpu, cip->vc[sub])
+			if (signal_pending(vcpu->arch.run_task))
+				return true;
+	return false;
+}
+
 static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
 {
 	int still_running = 0, i;
@@ -2568,6 +2586,21 @@ static inline int kvmppc_set_host_core(unsigned int cpu)
 	return 0;
 }
 
+static void set_irq_happened(int trap)
+{
+	switch (trap) {
+	case BOOK3S_INTERRUPT_EXTERNAL:
+		local_paca->irq_happened |= PACA_IRQ_EE;
+		break;
+	case BOOK3S_INTERRUPT_H_DOORBELL:
+		local_paca->irq_happened |= PACA_IRQ_DBELL;
+		break;
+	case BOOK3S_INTERRUPT_HMI:
+		local_paca->irq_happened |= PACA_IRQ_HMI;
+		break;
+	}
+}
+
 /*
  * Run a set of guest threads on a physical core.
  * Called with vc->lock held.
@@ -2587,6 +2620,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 	int pcpu, thr;
 	int target_threads;
 	int controlled_threads;
+	int trap;
 
 	/*
 	 * Remove from the list any threads that have a signal pending
@@ -2638,6 +2672,43 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 	if (vc->num_threads < target_threads)
 		collect_piggybacks(&core_info, target_threads);
 
+	/*
+	 * On radix, arrange for TLB flushing if necessary.
+	 * This has to be done before disabling interrupts since
+	 * it uses smp_call_function().
+	 */
+	pcpu = smp_processor_id();
+	if (kvm_is_radix(vc->kvm)) {
+		for (sub = 0; sub < core_info.n_subcores; ++sub)
+			for_each_runnable_thread(i, vcpu, core_info.vc[sub])
+				kvmppc_prepare_radix_vcpu(vcpu, pcpu);
+	}
+
+	/*
+	 * Hard-disable interrupts, and check resched flag and signals.
+	 * If we need to reschedule or deliver a signal, clean up
+	 * and return without going into the guest(s).
+	 */
+	local_irq_disable();
+	hard_irq_disable();
+	if (lazy_irq_pending() || need_resched() ||
+	    recheck_signals(&core_info)) {
+		local_irq_enable();
+		vc->vcore_state = VCORE_INACTIVE;
+		/* Unlock all except the primary vcore */
+		for (sub = 1; sub < core_info.n_subcores; ++sub) {
+			pvc = core_info.vc[sub];
+			/* Put back on to the preempted vcores list */
+			kvmppc_vcore_preempt(pvc);
+			spin_unlock(&pvc->lock);
+		}
+		for (i = 0; i < controlled_threads; ++i)
+			kvmppc_release_hwthread(pcpu + i);
+		return;
+	}
+
+	kvmppc_clear_host_core(pcpu);
+
 	/* Decide on micro-threading (split-core) mode */
 	subcore_size = threads_per_subcore;
 	cmd_bit = stat_bit = 0;
@@ -2665,7 +2736,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 		/* order writes to split_info before kvm_split_mode pointer */
 		smp_wmb();
 	}
-	pcpu = smp_processor_id();
 	for (thr = 0; thr < controlled_threads; ++thr)
 		paca[pcpu + thr].kvm_hstate.kvm_split_mode = sip;
 
@@ -2685,8 +2755,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 		}
 	}
 
-	kvmppc_clear_host_core(pcpu);
-
 	/* Start all the threads */
 	active = 0;
 	for (sub = 0; sub < core_info.n_subcores; ++sub) {
@@ -2738,14 +2806,25 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 	for (sub = 0; sub < core_info.n_subcores; ++sub)
 		spin_unlock(&core_info.vc[sub]->lock);
 
+	/*
+	 * Interrupts will be enabled once we get into the guest,
+	 * so tell lockdep that we're about to enable interrupts.
+	 */
+	trace_hardirqs_on();
+
 	guest_enter();
 
 	srcu_idx = srcu_read_lock(&vc->kvm->srcu);
 
-	__kvmppc_vcore_entry();
+	trap = __kvmppc_vcore_entry();
 
 	srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
 
+	guest_exit();
+
+	trace_hardirqs_off();
+	set_irq_happened(trap);
+
 	spin_lock(&vc->lock);
 	/* prevent other vcpu threads from doing kvmppc_start_thread() now */
 	vc->vcore_state = VCORE_EXITING;
@@ -2773,6 +2852,10 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 		split_info.do_nap = 0;
 	}
 
+	kvmppc_set_host_core(pcpu);
+
+	local_irq_enable();
+
 	/* Let secondaries go back to the offline loop */
 	for (i = 0; i < controlled_threads; ++i) {
 		kvmppc_release_hwthread(pcpu + i);
@@ -2781,13 +2864,10 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 		cpumask_clear_cpu(pcpu + i, &vc->kvm->arch.cpu_in_guest);
 	}
 
-	kvmppc_set_host_core(pcpu);
-
 	spin_unlock(&vc->lock);
 
 	/* make sure updates to secondary vcpu structs are visible now */
 	smp_mb();
-	guest_exit();
 
 	for (sub = 0; sub < core_info.n_subcores; ++sub) {
 		pvc = core_info.vc[sub];
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
index 404deb512844..dc54373c8780 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -61,13 +61,6 @@ BEGIN_FTR_SECTION
 	std	r3, HSTATE_DABR(r13)
 END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 
-	/* Hard-disable interrupts */
-	mfmsr   r10
-	std	r10, HSTATE_HOST_MSR(r13)
-	rldicl  r10,r10,48,1
-	rotldi  r10,r10,16
-	mtmsrd  r10,1
-
 	/* Save host PMU registers */
 BEGIN_FTR_SECTION
 	/* Work around P8 PMAE bug */
@@ -153,6 +146,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 	 *
 	 * R1       = host R1
 	 * R2       = host R2
+	 * R3       = trap number on this thread
 	 * R12      = exit handler id
 	 * R13      = PACA
 	 */
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index e3793bd510fe..8a3b10d42b2a 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -69,6 +69,7 @@ _GLOBAL_TOC(kvmppc_hv_entry_trampoline)
 	std	r0, PPC_LR_STKOFF(r1)
 	stdu	r1, -112(r1)
 	mfmsr	r10
+	std	r10, HSTATE_HOST_MSR(r13)
 	LOAD_REG_ADDR(r5, kvmppc_call_hv_entry)
 	li	r0,MSR_RI
 	andc	r0,r10,r0
@@ -165,6 +166,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	addi	r1, r1, 112
 	ld	r7, HSTATE_HOST_MSR(r13)
 
+	/* Return the trap number on this thread as the return value */
+	mr	r3, r12
+
 	/*
 	 * If we came back from the guest via a relocation-on interrupt,
 	 * we will be in virtual mode at this point, which makes it a
@@ -174,59 +178,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	andi.	r0, r0, MSR_IR		/* in real mode? */
 	bne	.Lvirt_return
 
-	cmpwi	r12, BOOK3S_INTERRUPT_EXTERNAL
-	beq	11f
-	cmpwi	r12, BOOK3S_INTERRUPT_H_DOORBELL
-	beq 	15f	/* Invoke the H_DOORBELL handler */
-	cmpwi	cr2, r12, BOOK3S_INTERRUPT_HMI
-	beq	cr2, 14f			/* HMI check */
-
-	/* RFI into the highmem handler, or branch to interrupt handler */
+	/* RFI into the highmem handler */
 	mfmsr	r6
 	li	r0, MSR_RI
 	andc	r6, r6, r0
 	mtmsrd	r6, 1			/* Clear RI in MSR */
 	mtsrr0	r8
 	mtsrr1	r7
-	/*
-	 * BOOK3S_INTERRUPT_MACHINE_CHECK is handled at the
-	 * time of guest exit
-	 */
 	RFI
 
-	/* On POWER7, we have external interrupts set to use HSRR0/1 */
-11:	mtspr	SPRN_HSRR0, r8
-	mtspr	SPRN_HSRR1, r7
-	ba	0x500
-
-14:	mtspr	SPRN_HSRR0, r8
-	mtspr	SPRN_HSRR1, r7
-	b	hmi_exception_after_realmode
-
-15:	mtspr SPRN_HSRR0, r8
-	mtspr SPRN_HSRR1, r7
-	ba    0xe80
-
-	/* Virtual-mode return - can't get here for HMI or machine check */
+	/* Virtual-mode return */
 .Lvirt_return:
-	cmpwi	r12, BOOK3S_INTERRUPT_EXTERNAL
-	beq	16f
-	cmpwi	r12, BOOK3S_INTERRUPT_H_DOORBELL
-	beq	17f
-	andi.	r0, r7, MSR_EE		/* were interrupts hard-enabled? */
-	beq	18f
-	mtmsrd	r7, 1			/* if so then re-enable them */
-18:	mtlr	r8
+	mtlr	r8
 	blr
 
-16:	mtspr	SPRN_HSRR0, r8		/* jump to reloc-on external vector */
-	mtspr	SPRN_HSRR1, r7
-	b	exc_virt_0x4500_hardware_interrupt
-
-17:	mtspr	SPRN_HSRR0, r8
-	mtspr	SPRN_HSRR1, r7
-	b	exc_virt_0x4e80_h_doorbell
-
 kvmppc_primary_no_guest:
 	/* We handle this much like a ceded vcpu */
 	/* put the HDEC into the DEC, since HDEC interrupts don't wake us */
-- 
2.11.0


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/2] KVM: PPC: Book3S HV: Simplify dynamic micro-threading code
  2017-06-27  6:07 ` Paul Mackerras
@ 2017-06-30  4:54   ` David Gibson
  -1 siblings, 0 replies; 20+ messages in thread
From: David Gibson @ 2017-06-30  4:54 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: kvm, kvm-ppc

[-- Attachment #1: Type: text/plain, Size: 10295 bytes --]

On Tue, Jun 27, 2017 at 04:07:56PM +1000, Paul Mackerras wrote:
> Since commit b009031f74da ("KVM: PPC: Book3S HV: Take out virtual
> core piggybacking code", 2016-09-15), we only have at most one
> vcore per subcore.  Previously, the fact that there might be more
> than one vcore per subcore meant that we had the notion of a
> "master vcore", which was the vcore that controlled thread 0 of
> the subcore.  We also needed a list per subcore in the core_info
> struct to record which vcores belonged to each subcore.  Now that
> there can only be one vcore in the subcore, we can replace the
> list with a simple pointer and get rid of the notion of the
> master vcore (and in fact treat every vcore as a master vcore).
> 
> We can also get rid of the subcore_vm[] field in the core_info
> struct since it is never read.
> 
> Signed-off-by: Paul Mackerras <paulus@ozlabs.org>

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>

> ---
>  arch/powerpc/include/asm/kvm_book3s.h     |  1 -
>  arch/powerpc/include/asm/kvm_book3s_asm.h |  2 +-
>  arch/powerpc/kvm/book3s_hv.c              | 88 +++++++++++++------------------
>  arch/powerpc/kvm/book3s_hv_builtin.c      |  2 +-
>  4 files changed, 39 insertions(+), 54 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
> index 2bf35017ffc0..b8d5b8e35244 100644
> --- a/arch/powerpc/include/asm/kvm_book3s.h
> +++ b/arch/powerpc/include/asm/kvm_book3s.h
> @@ -86,7 +86,6 @@ struct kvmppc_vcore {
>  	u16 last_cpu;
>  	u8 vcore_state;
>  	u8 in_guest;
> -	struct kvmppc_vcore *master_vcore;
>  	struct kvm_vcpu *runnable_threads[MAX_SMT_THREADS];
>  	struct list_head preempt_list;
>  	spinlock_t lock;
> diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
> index b148496ffe36..7cea76f11c26 100644
> --- a/arch/powerpc/include/asm/kvm_book3s_asm.h
> +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
> @@ -81,7 +81,7 @@ struct kvm_split_mode {
>  	u8		subcore_size;
>  	u8		do_nap;
>  	u8		napped[MAX_SMT_THREADS];
> -	struct kvmppc_vcore *master_vcs[MAX_SUBCORES];
> +	struct kvmppc_vcore *vc[MAX_SUBCORES];
>  };
>  
>  /*
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index c4ada89be658..03d6c7f9b547 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -2171,7 +2171,6 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
>  {
>  	int cpu;
>  	struct paca_struct *tpaca;
> -	struct kvmppc_vcore *mvc = vc->master_vcore;
>  	struct kvm *kvm = vc->kvm;
>  
>  	cpu = vc->pcpu;
> @@ -2181,7 +2180,7 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
>  			vcpu->arch.timer_running = 0;
>  		}
>  		cpu += vcpu->arch.ptid;
> -		vcpu->cpu = mvc->pcpu;
> +		vcpu->cpu = vc->pcpu;
>  		vcpu->arch.thread_cpu = cpu;
>  
>  		/*
> @@ -2207,10 +2206,10 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
>  	}
>  	tpaca = &paca[cpu];
>  	tpaca->kvm_hstate.kvm_vcpu = vcpu;
> -	tpaca->kvm_hstate.ptid = cpu - mvc->pcpu;
> +	tpaca->kvm_hstate.ptid = cpu - vc->pcpu;
>  	/* Order stores to hstate.kvm_vcpu etc. before store to kvm_vcore */
>  	smp_wmb();
> -	tpaca->kvm_hstate.kvm_vcore = mvc;
> +	tpaca->kvm_hstate.kvm_vcore = vc;
>  	if (cpu != smp_processor_id())
>  		kvmppc_ipi_thread(cpu);
>  }
> @@ -2339,8 +2338,7 @@ struct core_info {
>  	int		max_subcore_threads;
>  	int		total_threads;
>  	int		subcore_threads[MAX_SUBCORES];
> -	struct kvm	*subcore_vm[MAX_SUBCORES];
> -	struct list_head vcs[MAX_SUBCORES];
> +	struct kvmppc_vcore *vc[MAX_SUBCORES];
>  };
>  
>  /*
> @@ -2351,17 +2349,12 @@ static int subcore_thread_map[MAX_SUBCORES] = { 0, 4, 2, 6 };
>  
>  static void init_core_info(struct core_info *cip, struct kvmppc_vcore *vc)
>  {
> -	int sub;
> -
>  	memset(cip, 0, sizeof(*cip));
>  	cip->n_subcores = 1;
>  	cip->max_subcore_threads = vc->num_threads;
>  	cip->total_threads = vc->num_threads;
>  	cip->subcore_threads[0] = vc->num_threads;
> -	cip->subcore_vm[0] = vc->kvm;
> -	for (sub = 0; sub < MAX_SUBCORES; ++sub)
> -		INIT_LIST_HEAD(&cip->vcs[sub]);
> -	list_add_tail(&vc->preempt_list, &cip->vcs[0]);
> +	cip->vc[0] = vc;
>  }
>  
>  static bool subcore_config_ok(int n_subcores, int n_threads)
> @@ -2381,9 +2374,8 @@ static bool subcore_config_ok(int n_subcores, int n_threads)
>  	return n_subcores * roundup_pow_of_two(n_threads) <= MAX_SMT_THREADS;
>  }
>  
> -static void init_master_vcore(struct kvmppc_vcore *vc)
> +static void init_vcore_to_run(struct kvmppc_vcore *vc)
>  {
> -	vc->master_vcore = vc;
>  	vc->entry_exit_map = 0;
>  	vc->in_guest = 0;
>  	vc->napping_threads = 0;
> @@ -2408,9 +2400,9 @@ static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
>  	++cip->n_subcores;
>  	cip->total_threads += vc->num_threads;
>  	cip->subcore_threads[sub] = vc->num_threads;
> -	cip->subcore_vm[sub] = vc->kvm;
> -	init_master_vcore(vc);
> -	list_move_tail(&vc->preempt_list, &cip->vcs[sub]);
> +	cip->vc[sub] = vc;
> +	init_vcore_to_run(vc);
> +	list_del_init(&vc->preempt_list);
>  
>  	return true;
>  }
> @@ -2515,7 +2507,6 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
>  			wake_up(&vcpu->arch.cpu_run);
>  		}
>  	}
> -	list_del_init(&vc->preempt_list);
>  	if (!is_master) {
>  		if (still_running > 0) {
>  			kvmppc_vcore_preempt(vc);
> @@ -2587,7 +2578,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
>  	int i;
>  	int srcu_idx;
>  	struct core_info core_info;
> -	struct kvmppc_vcore *pvc, *vcnext;
> +	struct kvmppc_vcore *pvc;
>  	struct kvm_split_mode split_info, *sip;
>  	int split, subcore_size, active;
>  	int sub;
> @@ -2610,7 +2601,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
>  	/*
>  	 * Initialize *vc.
>  	 */
> -	init_master_vcore(vc);
> +	init_vcore_to_run(vc);
>  	vc->preempt_tb = TB_NIL;
>  
>  	/*
> @@ -2670,9 +2661,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
>  		split_info.ldbar = mfspr(SPRN_LDBAR);
>  		split_info.subcore_size = subcore_size;
>  		for (sub = 0; sub < core_info.n_subcores; ++sub)
> -			split_info.master_vcs[sub] =
> -				list_first_entry(&core_info.vcs[sub],
> -					struct kvmppc_vcore, preempt_list);
> +			split_info.vc[sub] = core_info.vc[sub];
>  		/* order writes to split_info before kvm_split_mode pointer */
>  		smp_wmb();
>  	}
> @@ -2704,24 +2693,23 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
>  		thr = subcore_thread_map[sub];
>  		thr0_done = false;
>  		active |= 1 << thr;
> -		list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list) {
> -			pvc->pcpu = pcpu + thr;
> -			for_each_runnable_thread(i, vcpu, pvc) {
> -				kvmppc_start_thread(vcpu, pvc);
> -				kvmppc_create_dtl_entry(vcpu, pvc);
> -				trace_kvm_guest_enter(vcpu);
> -				if (!vcpu->arch.ptid)
> -					thr0_done = true;
> -				active |= 1 << (thr + vcpu->arch.ptid);
> -			}
> -			/*
> -			 * We need to start the first thread of each subcore
> -			 * even if it doesn't have a vcpu.
> -			 */
> -			if (pvc->master_vcore == pvc && !thr0_done)
> -				kvmppc_start_thread(NULL, pvc);
> -			thr += pvc->num_threads;
> +		pvc = core_info.vc[sub];
> +		pvc->pcpu = pcpu + thr;
> +		for_each_runnable_thread(i, vcpu, pvc) {
> +			kvmppc_start_thread(vcpu, pvc);
> +			kvmppc_create_dtl_entry(vcpu, pvc);
> +			trace_kvm_guest_enter(vcpu);
> +			if (!vcpu->arch.ptid)
> +				thr0_done = true;
> +			active |= 1 << (thr + vcpu->arch.ptid);
>  		}
> +		/*
> +		 * We need to start the first thread of each subcore
> +		 * even if it doesn't have a vcpu.
> +		 */
> +		if (!thr0_done)
> +			kvmppc_start_thread(NULL, pvc);
> +		thr += pvc->num_threads;
>  	}
>  
>  	/*
> @@ -2748,8 +2736,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
>  	trace_kvmppc_run_core(vc, 0);
>  
>  	for (sub = 0; sub < core_info.n_subcores; ++sub)
> -		list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list)
> -			spin_unlock(&pvc->lock);
> +		spin_unlock(&core_info.vc[sub]->lock);
>  
>  	guest_enter();
>  
> @@ -2802,10 +2789,10 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
>  	smp_mb();
>  	guest_exit();
>  
> -	for (sub = 0; sub < core_info.n_subcores; ++sub)
> -		list_for_each_entry_safe(pvc, vcnext, &core_info.vcs[sub],
> -					 preempt_list)
> -			post_guest_process(pvc, pvc == vc);
> +	for (sub = 0; sub < core_info.n_subcores; ++sub) {
> +		pvc = core_info.vc[sub];
> +		post_guest_process(pvc, pvc == vc);
> +	}
>  
>  	spin_lock(&vc->lock);
>  	preempt_enable();
> @@ -3026,15 +3013,14 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
>  	 */
>  	if (!signal_pending(current)) {
>  		if (vc->vcore_state == VCORE_PIGGYBACK) {
> -			struct kvmppc_vcore *mvc = vc->master_vcore;
> -			if (spin_trylock(&mvc->lock)) {
> -				if (mvc->vcore_state == VCORE_RUNNING &&
> -				    !VCORE_IS_EXITING(mvc)) {
> +			if (spin_trylock(&vc->lock)) {
> +				if (vc->vcore_state == VCORE_RUNNING &&
> +				    !VCORE_IS_EXITING(vc)) {
>  					kvmppc_create_dtl_entry(vcpu, vc);
>  					kvmppc_start_thread(vcpu, vc);
>  					trace_kvm_guest_enter(vcpu);
>  				}
> -				spin_unlock(&mvc->lock);
> +				spin_unlock(&vc->lock);
>  			}
>  		} else if (vc->vcore_state == VCORE_RUNNING &&
>  			   !VCORE_IS_EXITING(vc)) {
> diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
> index ee4c2558c305..90644db9d38e 100644
> --- a/arch/powerpc/kvm/book3s_hv_builtin.c
> +++ b/arch/powerpc/kvm/book3s_hv_builtin.c
> @@ -307,7 +307,7 @@ void kvmhv_commence_exit(int trap)
>  		return;
>  
>  	for (i = 0; i < MAX_SUBCORES; ++i) {
> -		vc = sip->master_vcs[i];
> +		vc = sip->vc[i];
>  		if (!vc)
>  			break;
>  		do {

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/2] KVM: PPC: Book3S HV: Simplify dynamic micro-threading code
@ 2017-06-30  4:54   ` David Gibson
  0 siblings, 0 replies; 20+ messages in thread
From: David Gibson @ 2017-06-30  4:54 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: kvm, kvm-ppc

[-- Attachment #1: Type: text/plain, Size: 10295 bytes --]

On Tue, Jun 27, 2017 at 04:07:56PM +1000, Paul Mackerras wrote:
> Since commit b009031f74da ("KVM: PPC: Book3S HV: Take out virtual
> core piggybacking code", 2016-09-15), we only have at most one
> vcore per subcore.  Previously, the fact that there might be more
> than one vcore per subcore meant that we had the notion of a
> "master vcore", which was the vcore that controlled thread 0 of
> the subcore.  We also needed a list per subcore in the core_info
> struct to record which vcores belonged to each subcore.  Now that
> there can only be one vcore in the subcore, we can replace the
> list with a simple pointer and get rid of the notion of the
> master vcore (and in fact treat every vcore as a master vcore).
> 
> We can also get rid of the subcore_vm[] field in the core_info
> struct since it is never read.
> 
> Signed-off-by: Paul Mackerras <paulus@ozlabs.org>

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>

> ---
>  arch/powerpc/include/asm/kvm_book3s.h     |  1 -
>  arch/powerpc/include/asm/kvm_book3s_asm.h |  2 +-
>  arch/powerpc/kvm/book3s_hv.c              | 88 +++++++++++++------------------
>  arch/powerpc/kvm/book3s_hv_builtin.c      |  2 +-
>  4 files changed, 39 insertions(+), 54 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
> index 2bf35017ffc0..b8d5b8e35244 100644
> --- a/arch/powerpc/include/asm/kvm_book3s.h
> +++ b/arch/powerpc/include/asm/kvm_book3s.h
> @@ -86,7 +86,6 @@ struct kvmppc_vcore {
>  	u16 last_cpu;
>  	u8 vcore_state;
>  	u8 in_guest;
> -	struct kvmppc_vcore *master_vcore;
>  	struct kvm_vcpu *runnable_threads[MAX_SMT_THREADS];
>  	struct list_head preempt_list;
>  	spinlock_t lock;
> diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
> index b148496ffe36..7cea76f11c26 100644
> --- a/arch/powerpc/include/asm/kvm_book3s_asm.h
> +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
> @@ -81,7 +81,7 @@ struct kvm_split_mode {
>  	u8		subcore_size;
>  	u8		do_nap;
>  	u8		napped[MAX_SMT_THREADS];
> -	struct kvmppc_vcore *master_vcs[MAX_SUBCORES];
> +	struct kvmppc_vcore *vc[MAX_SUBCORES];
>  };
>  
>  /*
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index c4ada89be658..03d6c7f9b547 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -2171,7 +2171,6 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
>  {
>  	int cpu;
>  	struct paca_struct *tpaca;
> -	struct kvmppc_vcore *mvc = vc->master_vcore;
>  	struct kvm *kvm = vc->kvm;
>  
>  	cpu = vc->pcpu;
> @@ -2181,7 +2180,7 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
>  			vcpu->arch.timer_running = 0;
>  		}
>  		cpu += vcpu->arch.ptid;
> -		vcpu->cpu = mvc->pcpu;
> +		vcpu->cpu = vc->pcpu;
>  		vcpu->arch.thread_cpu = cpu;
>  
>  		/*
> @@ -2207,10 +2206,10 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
>  	}
>  	tpaca = &paca[cpu];
>  	tpaca->kvm_hstate.kvm_vcpu = vcpu;
> -	tpaca->kvm_hstate.ptid = cpu - mvc->pcpu;
> +	tpaca->kvm_hstate.ptid = cpu - vc->pcpu;
>  	/* Order stores to hstate.kvm_vcpu etc. before store to kvm_vcore */
>  	smp_wmb();
> -	tpaca->kvm_hstate.kvm_vcore = mvc;
> +	tpaca->kvm_hstate.kvm_vcore = vc;
>  	if (cpu != smp_processor_id())
>  		kvmppc_ipi_thread(cpu);
>  }
> @@ -2339,8 +2338,7 @@ struct core_info {
>  	int		max_subcore_threads;
>  	int		total_threads;
>  	int		subcore_threads[MAX_SUBCORES];
> -	struct kvm	*subcore_vm[MAX_SUBCORES];
> -	struct list_head vcs[MAX_SUBCORES];
> +	struct kvmppc_vcore *vc[MAX_SUBCORES];
>  };
>  
>  /*
> @@ -2351,17 +2349,12 @@ static int subcore_thread_map[MAX_SUBCORES] = { 0, 4, 2, 6 };
>  
>  static void init_core_info(struct core_info *cip, struct kvmppc_vcore *vc)
>  {
> -	int sub;
> -
>  	memset(cip, 0, sizeof(*cip));
>  	cip->n_subcores = 1;
>  	cip->max_subcore_threads = vc->num_threads;
>  	cip->total_threads = vc->num_threads;
>  	cip->subcore_threads[0] = vc->num_threads;
> -	cip->subcore_vm[0] = vc->kvm;
> -	for (sub = 0; sub < MAX_SUBCORES; ++sub)
> -		INIT_LIST_HEAD(&cip->vcs[sub]);
> -	list_add_tail(&vc->preempt_list, &cip->vcs[0]);
> +	cip->vc[0] = vc;
>  }
>  
>  static bool subcore_config_ok(int n_subcores, int n_threads)
> @@ -2381,9 +2374,8 @@ static bool subcore_config_ok(int n_subcores, int n_threads)
>  	return n_subcores * roundup_pow_of_two(n_threads) <= MAX_SMT_THREADS;
>  }
>  
> -static void init_master_vcore(struct kvmppc_vcore *vc)
> +static void init_vcore_to_run(struct kvmppc_vcore *vc)
>  {
> -	vc->master_vcore = vc;
>  	vc->entry_exit_map = 0;
>  	vc->in_guest = 0;
>  	vc->napping_threads = 0;
> @@ -2408,9 +2400,9 @@ static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
>  	++cip->n_subcores;
>  	cip->total_threads += vc->num_threads;
>  	cip->subcore_threads[sub] = vc->num_threads;
> -	cip->subcore_vm[sub] = vc->kvm;
> -	init_master_vcore(vc);
> -	list_move_tail(&vc->preempt_list, &cip->vcs[sub]);
> +	cip->vc[sub] = vc;
> +	init_vcore_to_run(vc);
> +	list_del_init(&vc->preempt_list);
>  
>  	return true;
>  }
> @@ -2515,7 +2507,6 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
>  			wake_up(&vcpu->arch.cpu_run);
>  		}
>  	}
> -	list_del_init(&vc->preempt_list);
>  	if (!is_master) {
>  		if (still_running > 0) {
>  			kvmppc_vcore_preempt(vc);
> @@ -2587,7 +2578,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
>  	int i;
>  	int srcu_idx;
>  	struct core_info core_info;
> -	struct kvmppc_vcore *pvc, *vcnext;
> +	struct kvmppc_vcore *pvc;
>  	struct kvm_split_mode split_info, *sip;
>  	int split, subcore_size, active;
>  	int sub;
> @@ -2610,7 +2601,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
>  	/*
>  	 * Initialize *vc.
>  	 */
> -	init_master_vcore(vc);
> +	init_vcore_to_run(vc);
>  	vc->preempt_tb = TB_NIL;
>  
>  	/*
> @@ -2670,9 +2661,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
>  		split_info.ldbar = mfspr(SPRN_LDBAR);
>  		split_info.subcore_size = subcore_size;
>  		for (sub = 0; sub < core_info.n_subcores; ++sub)
> -			split_info.master_vcs[sub] =
> -				list_first_entry(&core_info.vcs[sub],
> -					struct kvmppc_vcore, preempt_list);
> +			split_info.vc[sub] = core_info.vc[sub];
>  		/* order writes to split_info before kvm_split_mode pointer */
>  		smp_wmb();
>  	}
> @@ -2704,24 +2693,23 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
>  		thr = subcore_thread_map[sub];
>  		thr0_done = false;
>  		active |= 1 << thr;
> -		list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list) {
> -			pvc->pcpu = pcpu + thr;
> -			for_each_runnable_thread(i, vcpu, pvc) {
> -				kvmppc_start_thread(vcpu, pvc);
> -				kvmppc_create_dtl_entry(vcpu, pvc);
> -				trace_kvm_guest_enter(vcpu);
> -				if (!vcpu->arch.ptid)
> -					thr0_done = true;
> -				active |= 1 << (thr + vcpu->arch.ptid);
> -			}
> -			/*
> -			 * We need to start the first thread of each subcore
> -			 * even if it doesn't have a vcpu.
> -			 */
> -			if (pvc->master_vcore == pvc && !thr0_done)
> -				kvmppc_start_thread(NULL, pvc);
> -			thr += pvc->num_threads;
> +		pvc = core_info.vc[sub];
> +		pvc->pcpu = pcpu + thr;
> +		for_each_runnable_thread(i, vcpu, pvc) {
> +			kvmppc_start_thread(vcpu, pvc);
> +			kvmppc_create_dtl_entry(vcpu, pvc);
> +			trace_kvm_guest_enter(vcpu);
> +			if (!vcpu->arch.ptid)
> +				thr0_done = true;
> +			active |= 1 << (thr + vcpu->arch.ptid);
>  		}
> +		/*
> +		 * We need to start the first thread of each subcore
> +		 * even if it doesn't have a vcpu.
> +		 */
> +		if (!thr0_done)
> +			kvmppc_start_thread(NULL, pvc);
> +		thr += pvc->num_threads;
>  	}
>  
>  	/*
> @@ -2748,8 +2736,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
>  	trace_kvmppc_run_core(vc, 0);
>  
>  	for (sub = 0; sub < core_info.n_subcores; ++sub)
> -		list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list)
> -			spin_unlock(&pvc->lock);
> +		spin_unlock(&core_info.vc[sub]->lock);
>  
>  	guest_enter();
>  
> @@ -2802,10 +2789,10 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
>  	smp_mb();
>  	guest_exit();
>  
> -	for (sub = 0; sub < core_info.n_subcores; ++sub)
> -		list_for_each_entry_safe(pvc, vcnext, &core_info.vcs[sub],
> -					 preempt_list)
> -			post_guest_process(pvc, pvc == vc);
> +	for (sub = 0; sub < core_info.n_subcores; ++sub) {
> +		pvc = core_info.vc[sub];
> +		post_guest_process(pvc, pvc == vc);
> +	}
>  
>  	spin_lock(&vc->lock);
>  	preempt_enable();
> @@ -3026,15 +3013,14 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
>  	 */
>  	if (!signal_pending(current)) {
>  		if (vc->vcore_state == VCORE_PIGGYBACK) {
> -			struct kvmppc_vcore *mvc = vc->master_vcore;
> -			if (spin_trylock(&mvc->lock)) {
> -				if (mvc->vcore_state == VCORE_RUNNING &&
> -				    !VCORE_IS_EXITING(mvc)) {
> +			if (spin_trylock(&vc->lock)) {
> +				if (vc->vcore_state == VCORE_RUNNING &&
> +				    !VCORE_IS_EXITING(vc)) {
>  					kvmppc_create_dtl_entry(vcpu, vc);
>  					kvmppc_start_thread(vcpu, vc);
>  					trace_kvm_guest_enter(vcpu);
>  				}
> -				spin_unlock(&mvc->lock);
> +				spin_unlock(&vc->lock);
>  			}
>  		} else if (vc->vcore_state == VCORE_RUNNING &&
>  			   !VCORE_IS_EXITING(vc)) {
> diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
> index ee4c2558c305..90644db9d38e 100644
> --- a/arch/powerpc/kvm/book3s_hv_builtin.c
> +++ b/arch/powerpc/kvm/book3s_hv_builtin.c
> @@ -307,7 +307,7 @@ void kvmhv_commence_exit(int trap)
>  		return;
>  
>  	for (i = 0; i < MAX_SUBCORES; ++i) {
> -		vc = sip->master_vcs[i];
> +		vc = sip->vc[i];
>  		if (!vc)
>  			break;
>  		do {

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH v2 2/2] KVM: PPC: Book3S HV: Close race with testing for signals on guest entry
  2017-06-27  6:09   ` Paul Mackerras
@ 2017-06-30  7:03     ` Paul Mackerras
  -1 siblings, 0 replies; 20+ messages in thread
From: Paul Mackerras @ 2017-06-30  7:03 UTC (permalink / raw)
  To: kvm, kvm-ppc; +Cc: David Gibson

At present, interrupts are hard-disabled fairly late in the guest
entry path, in the assembly code.  Since we check for pending signals
for the vCPU(s) task(s) earlier in the guest entry path, it is
possible for a signal to be delivered before we enter the guest but
not be noticed until after we exit the guest for some other reason.

Similarly, it is possible for the scheduler to request a reschedule
while we are in the guest entry path, and we won't notice until after
we have run the guest, potentially for a whole timeslice.

Furthermore, with a radix guest on POWER9, we can take the interrupt
with the MMU on.  In this case we end up leaving interrupts
hard-disabled after the guest exit, and they are likely to stay
hard-disabled until we exit to userspace or context-switch to
another process.  This was masking the fact that we were also not
setting the RI (recoverable interrupt) bit in the MSR, meaning
that if we had taken an interrupt, it would have crashed the host
kernel with an unrecoverable interrupt message.

To close these races, we need to check for signals and reschedule
requests after hard-disabling interrupts, and then keep interrupts
hard-disabled until we enter the guest.  If there is a signal or a
reschedule request from another CPU, it will send an IPI, which will
cause a guest exit.

This puts the interrupt disabling before we call kvmppc_start_thread()
for all the secondary threads of this core that are going to run vCPUs.
The reason for that is that once we have started the secondary threads
there is no easy way to back out without going through at least part
of the guest entry path.  However, kvmppc_start_thread() includes some
code for radix guests which needs to call smp_call_function(), which
must be called with interrupts enabled.  To solve this problem, this
patch moves that code into a separate function that is called earlier.

When the guest exit is caused by an external interrupt, a hypervisor
doorbell or a hypervisor maintenance interrupt, we now handle these
using the replay facility.  __kvmppc_vcore_entry() now returns the
trap number that caused the exit on this thread, and instead of the
assembly code jumping to the handler entry, we return to C code with
interrupts still hard-disabled and set the irq_happened flag in the
PACA, so that when we do local_irq_enable() the appropriate handler
gets called.

With all this, we now have the interrupt soft-enable flag clear while
we are in the guest.  This is useful because code in the real-mode
hypercall handlers that checks whether interrupts are enabled will
now see that they are disabled, which is correct, since interrupts
are hard-disabled in the real-mode code.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
v2: Set MSR_RI in the guest exit path, to avoid getting unrecoverable
interrupt messages.

 arch/powerpc/kvm/book3s_hv.c            | 140 +++++++++++++++++++++++++-------
 arch/powerpc/kvm/book3s_hv_interrupts.S |   8 +-
 arch/powerpc/kvm/book3s_hv_rmhandlers.S |  58 ++++---------
 3 files changed, 127 insertions(+), 79 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 03d6c7f9b547..c6313c5d331c 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -647,6 +647,7 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
 	unsigned long stolen;
 	unsigned long core_stolen;
 	u64 now;
+	unsigned long flags;
 
 	dt = vcpu->arch.dtl_ptr;
 	vpa = vcpu->arch.vpa.pinned_addr;
@@ -654,10 +655,10 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
 	core_stolen = vcore_stolen_time(vc, now);
 	stolen = core_stolen - vcpu->arch.stolen_logged;
 	vcpu->arch.stolen_logged = core_stolen;
-	spin_lock_irq(&vcpu->arch.tbacct_lock);
+	spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
 	stolen += vcpu->arch.busy_stolen;
 	vcpu->arch.busy_stolen = 0;
-	spin_unlock_irq(&vcpu->arch.tbacct_lock);
+	spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
 	if (!dt || !vpa)
 		return;
 	memset(dt, 0, sizeof(struct dtl_entry));
@@ -2085,7 +2086,7 @@ static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
 	}
 }
 
-extern void __kvmppc_vcore_entry(void);
+extern int __kvmppc_vcore_entry(void);
 
 static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
 				   struct kvm_vcpu *vcpu)
@@ -2167,6 +2168,31 @@ static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
 			smp_call_function_single(cpu + i, do_nothing, NULL, 1);
 }
 
+static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
+{
+	struct kvm *kvm = vcpu->kvm;
+
+	/*
+	 * With radix, the guest can do TLB invalidations itself,
+	 * and it could choose to use the local form (tlbiel) if
+	 * it is invalidating a translation that has only ever been
+	 * used on one vcpu.  However, that doesn't mean it has
+	 * only ever been used on one physical cpu, since vcpus
+	 * can move around between pcpus.  To cope with this, when
+	 * a vcpu moves from one pcpu to another, we need to tell
+	 * any vcpus running on the same core as this vcpu previously
+	 * ran to flush the TLB.  The TLB is shared between threads,
+	 * so we use a single bit in .need_tlb_flush for all 4 threads.
+	 */
+	if (vcpu->arch.prev_cpu != pcpu) {
+		if (vcpu->arch.prev_cpu >= 0 &&
+		    cpu_first_thread_sibling(vcpu->arch.prev_cpu) !=
+		    cpu_first_thread_sibling(pcpu))
+			radix_flush_cpu(kvm, vcpu->arch.prev_cpu, vcpu);
+		vcpu->arch.prev_cpu = pcpu;
+	}
+}
+
 static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
 {
 	int cpu;
@@ -2182,26 +2208,6 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
 		cpu += vcpu->arch.ptid;
 		vcpu->cpu = vc->pcpu;
 		vcpu->arch.thread_cpu = cpu;
-
-		/*
-		 * With radix, the guest can do TLB invalidations itself,
-		 * and it could choose to use the local form (tlbiel) if
-		 * it is invalidating a translation that has only ever been
-		 * used on one vcpu.  However, that doesn't mean it has
-		 * only ever been used on one physical cpu, since vcpus
-		 * can move around between pcpus.  To cope with this, when
-		 * a vcpu moves from one pcpu to another, we need to tell
-		 * any vcpus running on the same core as this vcpu previously
-		 * ran to flush the TLB.  The TLB is shared between threads,
-		 * so we use a single bit in .need_tlb_flush for all 4 threads.
-		 */
-		if (kvm_is_radix(kvm) && vcpu->arch.prev_cpu != cpu) {
-			if (vcpu->arch.prev_cpu >= 0 &&
-			    cpu_first_thread_sibling(vcpu->arch.prev_cpu) !=
-			    cpu_first_thread_sibling(cpu))
-				radix_flush_cpu(kvm, vcpu->arch.prev_cpu, vcpu);
-			vcpu->arch.prev_cpu = cpu;
-		}
 		cpumask_set_cpu(cpu, &kvm->arch.cpu_in_guest);
 	}
 	tpaca = &paca[cpu];
@@ -2470,6 +2476,18 @@ static void collect_piggybacks(struct core_info *cip, int target_threads)
 	spin_unlock(&lp->lock);
 }
 
+static bool recheck_signals(struct core_info *cip)
+{
+	int sub, i;
+	struct kvm_vcpu *vcpu;
+
+	for (sub = 0; sub < cip->n_subcores; ++sub)
+		for_each_runnable_thread(i, vcpu, cip->vc[sub])
+			if (signal_pending(vcpu->arch.run_task))
+				return true;
+	return false;
+}
+
 static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
 {
 	int still_running = 0, i;
@@ -2568,6 +2586,21 @@ static inline int kvmppc_set_host_core(unsigned int cpu)
 	return 0;
 }
 
+static void set_irq_happened(int trap)
+{
+	switch (trap) {
+	case BOOK3S_INTERRUPT_EXTERNAL:
+		local_paca->irq_happened |= PACA_IRQ_EE;
+		break;
+	case BOOK3S_INTERRUPT_H_DOORBELL:
+		local_paca->irq_happened |= PACA_IRQ_DBELL;
+		break;
+	case BOOK3S_INTERRUPT_HMI:
+		local_paca->irq_happened |= PACA_IRQ_HMI;
+		break;
+	}
+}
+
 /*
  * Run a set of guest threads on a physical core.
  * Called with vc->lock held.
@@ -2587,6 +2620,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 	int pcpu, thr;
 	int target_threads;
 	int controlled_threads;
+	int trap;
 
 	/*
 	 * Remove from the list any threads that have a signal pending
@@ -2638,6 +2672,43 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 	if (vc->num_threads < target_threads)
 		collect_piggybacks(&core_info, target_threads);
 
+	/*
+	 * On radix, arrange for TLB flushing if necessary.
+	 * This has to be done before disabling interrupts since
+	 * it uses smp_call_function().
+	 */
+	pcpu = smp_processor_id();
+	if (kvm_is_radix(vc->kvm)) {
+		for (sub = 0; sub < core_info.n_subcores; ++sub)
+			for_each_runnable_thread(i, vcpu, core_info.vc[sub])
+				kvmppc_prepare_radix_vcpu(vcpu, pcpu);
+	}
+
+	/*
+	 * Hard-disable interrupts, and check resched flag and signals.
+	 * If we need to reschedule or deliver a signal, clean up
+	 * and return without going into the guest(s).
+	 */
+	local_irq_disable();
+	hard_irq_disable();
+	if (lazy_irq_pending() || need_resched() ||
+	    recheck_signals(&core_info)) {
+		local_irq_enable();
+		vc->vcore_state = VCORE_INACTIVE;
+		/* Unlock all except the primary vcore */
+		for (sub = 1; sub < core_info.n_subcores; ++sub) {
+			pvc = core_info.vc[sub];
+			/* Put back on to the preempted vcores list */
+			kvmppc_vcore_preempt(pvc);
+			spin_unlock(&pvc->lock);
+		}
+		for (i = 0; i < controlled_threads; ++i)
+			kvmppc_release_hwthread(pcpu + i);
+		return;
+	}
+
+	kvmppc_clear_host_core(pcpu);
+
 	/* Decide on micro-threading (split-core) mode */
 	subcore_size = threads_per_subcore;
 	cmd_bit = stat_bit = 0;
@@ -2665,7 +2736,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 		/* order writes to split_info before kvm_split_mode pointer */
 		smp_wmb();
 	}
-	pcpu = smp_processor_id();
 	for (thr = 0; thr < controlled_threads; ++thr)
 		paca[pcpu + thr].kvm_hstate.kvm_split_mode = sip;
 
@@ -2685,8 +2755,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 		}
 	}
 
-	kvmppc_clear_host_core(pcpu);
-
 	/* Start all the threads */
 	active = 0;
 	for (sub = 0; sub < core_info.n_subcores; ++sub) {
@@ -2738,14 +2806,25 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 	for (sub = 0; sub < core_info.n_subcores; ++sub)
 		spin_unlock(&core_info.vc[sub]->lock);
 
+	/*
+	 * Interrupts will be enabled once we get into the guest,
+	 * so tell lockdep that we're about to enable interrupts.
+	 */
+	trace_hardirqs_on();
+
 	guest_enter();
 
 	srcu_idx = srcu_read_lock(&vc->kvm->srcu);
 
-	__kvmppc_vcore_entry();
+	trap = __kvmppc_vcore_entry();
 
 	srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
 
+	guest_exit();
+
+	trace_hardirqs_off();
+	set_irq_happened(trap);
+
 	spin_lock(&vc->lock);
 	/* prevent other vcpu threads from doing kvmppc_start_thread() now */
 	vc->vcore_state = VCORE_EXITING;
@@ -2773,6 +2852,10 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 		split_info.do_nap = 0;
 	}
 
+	kvmppc_set_host_core(pcpu);
+
+	local_irq_enable();
+
 	/* Let secondaries go back to the offline loop */
 	for (i = 0; i < controlled_threads; ++i) {
 		kvmppc_release_hwthread(pcpu + i);
@@ -2781,13 +2864,10 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 		cpumask_clear_cpu(pcpu + i, &vc->kvm->arch.cpu_in_guest);
 	}
 
-	kvmppc_set_host_core(pcpu);
-
 	spin_unlock(&vc->lock);
 
 	/* make sure updates to secondary vcpu structs are visible now */
 	smp_mb();
-	guest_exit();
 
 	for (sub = 0; sub < core_info.n_subcores; ++sub) {
 		pvc = core_info.vc[sub];
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
index 404deb512844..dc54373c8780 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -61,13 +61,6 @@ BEGIN_FTR_SECTION
 	std	r3, HSTATE_DABR(r13)
 END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 
-	/* Hard-disable interrupts */
-	mfmsr   r10
-	std	r10, HSTATE_HOST_MSR(r13)
-	rldicl  r10,r10,48,1
-	rotldi  r10,r10,16
-	mtmsrd  r10,1
-
 	/* Save host PMU registers */
 BEGIN_FTR_SECTION
 	/* Work around P8 PMAE bug */
@@ -153,6 +146,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 	 *
 	 * R1       = host R1
 	 * R2       = host R2
+	 * R3       = trap number on this thread
 	 * R12      = exit handler id
 	 * R13      = PACA
 	 */
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index e3793bd510fe..6ea4b53f4b16 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -69,6 +69,7 @@ _GLOBAL_TOC(kvmppc_hv_entry_trampoline)
 	std	r0, PPC_LR_STKOFF(r1)
 	stdu	r1, -112(r1)
 	mfmsr	r10
+	std	r10, HSTATE_HOST_MSR(r13)
 	LOAD_REG_ADDR(r5, kvmppc_call_hv_entry)
 	li	r0,MSR_RI
 	andc	r0,r10,r0
@@ -165,6 +166,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	addi	r1, r1, 112
 	ld	r7, HSTATE_HOST_MSR(r13)
 
+	/* Return the trap number on this thread as the return value */
+	mr	r3, r12
+
 	/*
 	 * If we came back from the guest via a relocation-on interrupt,
 	 * we will be in virtual mode at this point, which makes it a
@@ -174,59 +178,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	andi.	r0, r0, MSR_IR		/* in real mode? */
 	bne	.Lvirt_return
 
-	cmpwi	r12, BOOK3S_INTERRUPT_EXTERNAL
-	beq	11f
-	cmpwi	r12, BOOK3S_INTERRUPT_H_DOORBELL
-	beq 	15f	/* Invoke the H_DOORBELL handler */
-	cmpwi	cr2, r12, BOOK3S_INTERRUPT_HMI
-	beq	cr2, 14f			/* HMI check */
-
-	/* RFI into the highmem handler, or branch to interrupt handler */
+	/* RFI into the highmem handler */
 	mfmsr	r6
 	li	r0, MSR_RI
 	andc	r6, r6, r0
 	mtmsrd	r6, 1			/* Clear RI in MSR */
 	mtsrr0	r8
 	mtsrr1	r7
-	/*
-	 * BOOK3S_INTERRUPT_MACHINE_CHECK is handled at the
-	 * time of guest exit
-	 */
 	RFI
 
-	/* On POWER7, we have external interrupts set to use HSRR0/1 */
-11:	mtspr	SPRN_HSRR0, r8
-	mtspr	SPRN_HSRR1, r7
-	ba	0x500
-
-14:	mtspr	SPRN_HSRR0, r8
-	mtspr	SPRN_HSRR1, r7
-	b	hmi_exception_after_realmode
-
-15:	mtspr SPRN_HSRR0, r8
-	mtspr SPRN_HSRR1, r7
-	ba    0xe80
-
-	/* Virtual-mode return - can't get here for HMI or machine check */
+	/* Virtual-mode return */
 .Lvirt_return:
-	cmpwi	r12, BOOK3S_INTERRUPT_EXTERNAL
-	beq	16f
-	cmpwi	r12, BOOK3S_INTERRUPT_H_DOORBELL
-	beq	17f
-	andi.	r0, r7, MSR_EE		/* were interrupts hard-enabled? */
-	beq	18f
-	mtmsrd	r7, 1			/* if so then re-enable them */
-18:	mtlr	r8
+	mtlr	r8
 	blr
 
-16:	mtspr	SPRN_HSRR0, r8		/* jump to reloc-on external vector */
-	mtspr	SPRN_HSRR1, r7
-	b	exc_virt_0x4500_hardware_interrupt
-
-17:	mtspr	SPRN_HSRR0, r8
-	mtspr	SPRN_HSRR1, r7
-	b	exc_virt_0x4e80_h_doorbell
-
 kvmppc_primary_no_guest:
 	/* We handle this much like a ceded vcpu */
 	/* put the HDEC into the DEC, since HDEC interrupts don't wake us */
@@ -1258,6 +1223,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 
 	stw	r12,VCPU_TRAP(r9)
 
+	/*
+	 * Now that we have saved away SRR0/1 and HSRR0/1,
+	 * interrupts are recoverable in principle, so set MSR_RI.
+	 * This becomes important for relocation-on interrupts from
+	 * the guest, which we can get in radix mode on POWER9.
+	 */
+	li	r0, MSR_RI
+	mtmsrd	r0, 1
+
 #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
 	addi	r3, r9, VCPU_TB_RMINTR
 	mr	r4, r9
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v2 2/2] KVM: PPC: Book3S HV: Close race with testing for signals on guest entry
@ 2017-06-30  7:03     ` Paul Mackerras
  0 siblings, 0 replies; 20+ messages in thread
From: Paul Mackerras @ 2017-06-30  7:03 UTC (permalink / raw)
  To: kvm, kvm-ppc; +Cc: David Gibson

At present, interrupts are hard-disabled fairly late in the guest
entry path, in the assembly code.  Since we check for pending signals
for the vCPU(s) task(s) earlier in the guest entry path, it is
possible for a signal to be delivered before we enter the guest but
not be noticed until after we exit the guest for some other reason.

Similarly, it is possible for the scheduler to request a reschedule
while we are in the guest entry path, and we won't notice until after
we have run the guest, potentially for a whole timeslice.

Furthermore, with a radix guest on POWER9, we can take the interrupt
with the MMU on.  In this case we end up leaving interrupts
hard-disabled after the guest exit, and they are likely to stay
hard-disabled until we exit to userspace or context-switch to
another process.  This was masking the fact that we were also not
setting the RI (recoverable interrupt) bit in the MSR, meaning
that if we had taken an interrupt, it would have crashed the host
kernel with an unrecoverable interrupt message.

To close these races, we need to check for signals and reschedule
requests after hard-disabling interrupts, and then keep interrupts
hard-disabled until we enter the guest.  If there is a signal or a
reschedule request from another CPU, it will send an IPI, which will
cause a guest exit.

This puts the interrupt disabling before we call kvmppc_start_thread()
for all the secondary threads of this core that are going to run vCPUs.
The reason for that is that once we have started the secondary threads
there is no easy way to back out without going through at least part
of the guest entry path.  However, kvmppc_start_thread() includes some
code for radix guests which needs to call smp_call_function(), which
must be called with interrupts enabled.  To solve this problem, this
patch moves that code into a separate function that is called earlier.

When the guest exit is caused by an external interrupt, a hypervisor
doorbell or a hypervisor maintenance interrupt, we now handle these
using the replay facility.  __kvmppc_vcore_entry() now returns the
trap number that caused the exit on this thread, and instead of the
assembly code jumping to the handler entry, we return to C code with
interrupts still hard-disabled and set the irq_happened flag in the
PACA, so that when we do local_irq_enable() the appropriate handler
gets called.

With all this, we now have the interrupt soft-enable flag clear while
we are in the guest.  This is useful because code in the real-mode
hypercall handlers that checks whether interrupts are enabled will
now see that they are disabled, which is correct, since interrupts
are hard-disabled in the real-mode code.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
v2: Set MSR_RI in the guest exit path, to avoid getting unrecoverable
interrupt messages.

 arch/powerpc/kvm/book3s_hv.c            | 140 +++++++++++++++++++++++++-------
 arch/powerpc/kvm/book3s_hv_interrupts.S |   8 +-
 arch/powerpc/kvm/book3s_hv_rmhandlers.S |  58 ++++---------
 3 files changed, 127 insertions(+), 79 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 03d6c7f9b547..c6313c5d331c 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -647,6 +647,7 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
 	unsigned long stolen;
 	unsigned long core_stolen;
 	u64 now;
+	unsigned long flags;
 
 	dt = vcpu->arch.dtl_ptr;
 	vpa = vcpu->arch.vpa.pinned_addr;
@@ -654,10 +655,10 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
 	core_stolen = vcore_stolen_time(vc, now);
 	stolen = core_stolen - vcpu->arch.stolen_logged;
 	vcpu->arch.stolen_logged = core_stolen;
-	spin_lock_irq(&vcpu->arch.tbacct_lock);
+	spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
 	stolen += vcpu->arch.busy_stolen;
 	vcpu->arch.busy_stolen = 0;
-	spin_unlock_irq(&vcpu->arch.tbacct_lock);
+	spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
 	if (!dt || !vpa)
 		return;
 	memset(dt, 0, sizeof(struct dtl_entry));
@@ -2085,7 +2086,7 @@ static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
 	}
 }
 
-extern void __kvmppc_vcore_entry(void);
+extern int __kvmppc_vcore_entry(void);
 
 static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
 				   struct kvm_vcpu *vcpu)
@@ -2167,6 +2168,31 @@ static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
 			smp_call_function_single(cpu + i, do_nothing, NULL, 1);
 }
 
+static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
+{
+	struct kvm *kvm = vcpu->kvm;
+
+	/*
+	 * With radix, the guest can do TLB invalidations itself,
+	 * and it could choose to use the local form (tlbiel) if
+	 * it is invalidating a translation that has only ever been
+	 * used on one vcpu.  However, that doesn't mean it has
+	 * only ever been used on one physical cpu, since vcpus
+	 * can move around between pcpus.  To cope with this, when
+	 * a vcpu moves from one pcpu to another, we need to tell
+	 * any vcpus running on the same core as this vcpu previously
+	 * ran to flush the TLB.  The TLB is shared between threads,
+	 * so we use a single bit in .need_tlb_flush for all 4 threads.
+	 */
+	if (vcpu->arch.prev_cpu != pcpu) {
+		if (vcpu->arch.prev_cpu >= 0 &&
+		    cpu_first_thread_sibling(vcpu->arch.prev_cpu) !+		    cpu_first_thread_sibling(pcpu))
+			radix_flush_cpu(kvm, vcpu->arch.prev_cpu, vcpu);
+		vcpu->arch.prev_cpu = pcpu;
+	}
+}
+
 static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
 {
 	int cpu;
@@ -2182,26 +2208,6 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
 		cpu += vcpu->arch.ptid;
 		vcpu->cpu = vc->pcpu;
 		vcpu->arch.thread_cpu = cpu;
-
-		/*
-		 * With radix, the guest can do TLB invalidations itself,
-		 * and it could choose to use the local form (tlbiel) if
-		 * it is invalidating a translation that has only ever been
-		 * used on one vcpu.  However, that doesn't mean it has
-		 * only ever been used on one physical cpu, since vcpus
-		 * can move around between pcpus.  To cope with this, when
-		 * a vcpu moves from one pcpu to another, we need to tell
-		 * any vcpus running on the same core as this vcpu previously
-		 * ran to flush the TLB.  The TLB is shared between threads,
-		 * so we use a single bit in .need_tlb_flush for all 4 threads.
-		 */
-		if (kvm_is_radix(kvm) && vcpu->arch.prev_cpu != cpu) {
-			if (vcpu->arch.prev_cpu >= 0 &&
-			    cpu_first_thread_sibling(vcpu->arch.prev_cpu) !-			    cpu_first_thread_sibling(cpu))
-				radix_flush_cpu(kvm, vcpu->arch.prev_cpu, vcpu);
-			vcpu->arch.prev_cpu = cpu;
-		}
 		cpumask_set_cpu(cpu, &kvm->arch.cpu_in_guest);
 	}
 	tpaca = &paca[cpu];
@@ -2470,6 +2476,18 @@ static void collect_piggybacks(struct core_info *cip, int target_threads)
 	spin_unlock(&lp->lock);
 }
 
+static bool recheck_signals(struct core_info *cip)
+{
+	int sub, i;
+	struct kvm_vcpu *vcpu;
+
+	for (sub = 0; sub < cip->n_subcores; ++sub)
+		for_each_runnable_thread(i, vcpu, cip->vc[sub])
+			if (signal_pending(vcpu->arch.run_task))
+				return true;
+	return false;
+}
+
 static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
 {
 	int still_running = 0, i;
@@ -2568,6 +2586,21 @@ static inline int kvmppc_set_host_core(unsigned int cpu)
 	return 0;
 }
 
+static void set_irq_happened(int trap)
+{
+	switch (trap) {
+	case BOOK3S_INTERRUPT_EXTERNAL:
+		local_paca->irq_happened |= PACA_IRQ_EE;
+		break;
+	case BOOK3S_INTERRUPT_H_DOORBELL:
+		local_paca->irq_happened |= PACA_IRQ_DBELL;
+		break;
+	case BOOK3S_INTERRUPT_HMI:
+		local_paca->irq_happened |= PACA_IRQ_HMI;
+		break;
+	}
+}
+
 /*
  * Run a set of guest threads on a physical core.
  * Called with vc->lock held.
@@ -2587,6 +2620,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 	int pcpu, thr;
 	int target_threads;
 	int controlled_threads;
+	int trap;
 
 	/*
 	 * Remove from the list any threads that have a signal pending
@@ -2638,6 +2672,43 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 	if (vc->num_threads < target_threads)
 		collect_piggybacks(&core_info, target_threads);
 
+	/*
+	 * On radix, arrange for TLB flushing if necessary.
+	 * This has to be done before disabling interrupts since
+	 * it uses smp_call_function().
+	 */
+	pcpu = smp_processor_id();
+	if (kvm_is_radix(vc->kvm)) {
+		for (sub = 0; sub < core_info.n_subcores; ++sub)
+			for_each_runnable_thread(i, vcpu, core_info.vc[sub])
+				kvmppc_prepare_radix_vcpu(vcpu, pcpu);
+	}
+
+	/*
+	 * Hard-disable interrupts, and check resched flag and signals.
+	 * If we need to reschedule or deliver a signal, clean up
+	 * and return without going into the guest(s).
+	 */
+	local_irq_disable();
+	hard_irq_disable();
+	if (lazy_irq_pending() || need_resched() ||
+	    recheck_signals(&core_info)) {
+		local_irq_enable();
+		vc->vcore_state = VCORE_INACTIVE;
+		/* Unlock all except the primary vcore */
+		for (sub = 1; sub < core_info.n_subcores; ++sub) {
+			pvc = core_info.vc[sub];
+			/* Put back on to the preempted vcores list */
+			kvmppc_vcore_preempt(pvc);
+			spin_unlock(&pvc->lock);
+		}
+		for (i = 0; i < controlled_threads; ++i)
+			kvmppc_release_hwthread(pcpu + i);
+		return;
+	}
+
+	kvmppc_clear_host_core(pcpu);
+
 	/* Decide on micro-threading (split-core) mode */
 	subcore_size = threads_per_subcore;
 	cmd_bit = stat_bit = 0;
@@ -2665,7 +2736,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 		/* order writes to split_info before kvm_split_mode pointer */
 		smp_wmb();
 	}
-	pcpu = smp_processor_id();
 	for (thr = 0; thr < controlled_threads; ++thr)
 		paca[pcpu + thr].kvm_hstate.kvm_split_mode = sip;
 
@@ -2685,8 +2755,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 		}
 	}
 
-	kvmppc_clear_host_core(pcpu);
-
 	/* Start all the threads */
 	active = 0;
 	for (sub = 0; sub < core_info.n_subcores; ++sub) {
@@ -2738,14 +2806,25 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 	for (sub = 0; sub < core_info.n_subcores; ++sub)
 		spin_unlock(&core_info.vc[sub]->lock);
 
+	/*
+	 * Interrupts will be enabled once we get into the guest,
+	 * so tell lockdep that we're about to enable interrupts.
+	 */
+	trace_hardirqs_on();
+
 	guest_enter();
 
 	srcu_idx = srcu_read_lock(&vc->kvm->srcu);
 
-	__kvmppc_vcore_entry();
+	trap = __kvmppc_vcore_entry();
 
 	srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
 
+	guest_exit();
+
+	trace_hardirqs_off();
+	set_irq_happened(trap);
+
 	spin_lock(&vc->lock);
 	/* prevent other vcpu threads from doing kvmppc_start_thread() now */
 	vc->vcore_state = VCORE_EXITING;
@@ -2773,6 +2852,10 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 		split_info.do_nap = 0;
 	}
 
+	kvmppc_set_host_core(pcpu);
+
+	local_irq_enable();
+
 	/* Let secondaries go back to the offline loop */
 	for (i = 0; i < controlled_threads; ++i) {
 		kvmppc_release_hwthread(pcpu + i);
@@ -2781,13 +2864,10 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 		cpumask_clear_cpu(pcpu + i, &vc->kvm->arch.cpu_in_guest);
 	}
 
-	kvmppc_set_host_core(pcpu);
-
 	spin_unlock(&vc->lock);
 
 	/* make sure updates to secondary vcpu structs are visible now */
 	smp_mb();
-	guest_exit();
 
 	for (sub = 0; sub < core_info.n_subcores; ++sub) {
 		pvc = core_info.vc[sub];
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
index 404deb512844..dc54373c8780 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -61,13 +61,6 @@ BEGIN_FTR_SECTION
 	std	r3, HSTATE_DABR(r13)
 END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 
-	/* Hard-disable interrupts */
-	mfmsr   r10
-	std	r10, HSTATE_HOST_MSR(r13)
-	rldicl  r10,r10,48,1
-	rotldi  r10,r10,16
-	mtmsrd  r10,1
-
 	/* Save host PMU registers */
 BEGIN_FTR_SECTION
 	/* Work around P8 PMAE bug */
@@ -153,6 +146,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 	 *
 	 * R1       = host R1
 	 * R2       = host R2
+	 * R3       = trap number on this thread
 	 * R12      = exit handler id
 	 * R13      = PACA
 	 */
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index e3793bd510fe..6ea4b53f4b16 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -69,6 +69,7 @@ _GLOBAL_TOC(kvmppc_hv_entry_trampoline)
 	std	r0, PPC_LR_STKOFF(r1)
 	stdu	r1, -112(r1)
 	mfmsr	r10
+	std	r10, HSTATE_HOST_MSR(r13)
 	LOAD_REG_ADDR(r5, kvmppc_call_hv_entry)
 	li	r0,MSR_RI
 	andc	r0,r10,r0
@@ -165,6 +166,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	addi	r1, r1, 112
 	ld	r7, HSTATE_HOST_MSR(r13)
 
+	/* Return the trap number on this thread as the return value */
+	mr	r3, r12
+
 	/*
 	 * If we came back from the guest via a relocation-on interrupt,
 	 * we will be in virtual mode at this point, which makes it a
@@ -174,59 +178,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	andi.	r0, r0, MSR_IR		/* in real mode? */
 	bne	.Lvirt_return
 
-	cmpwi	r12, BOOK3S_INTERRUPT_EXTERNAL
-	beq	11f
-	cmpwi	r12, BOOK3S_INTERRUPT_H_DOORBELL
-	beq 	15f	/* Invoke the H_DOORBELL handler */
-	cmpwi	cr2, r12, BOOK3S_INTERRUPT_HMI
-	beq	cr2, 14f			/* HMI check */
-
-	/* RFI into the highmem handler, or branch to interrupt handler */
+	/* RFI into the highmem handler */
 	mfmsr	r6
 	li	r0, MSR_RI
 	andc	r6, r6, r0
 	mtmsrd	r6, 1			/* Clear RI in MSR */
 	mtsrr0	r8
 	mtsrr1	r7
-	/*
-	 * BOOK3S_INTERRUPT_MACHINE_CHECK is handled at the
-	 * time of guest exit
-	 */
 	RFI
 
-	/* On POWER7, we have external interrupts set to use HSRR0/1 */
-11:	mtspr	SPRN_HSRR0, r8
-	mtspr	SPRN_HSRR1, r7
-	ba	0x500
-
-14:	mtspr	SPRN_HSRR0, r8
-	mtspr	SPRN_HSRR1, r7
-	b	hmi_exception_after_realmode
-
-15:	mtspr SPRN_HSRR0, r8
-	mtspr SPRN_HSRR1, r7
-	ba    0xe80
-
-	/* Virtual-mode return - can't get here for HMI or machine check */
+	/* Virtual-mode return */
 .Lvirt_return:
-	cmpwi	r12, BOOK3S_INTERRUPT_EXTERNAL
-	beq	16f
-	cmpwi	r12, BOOK3S_INTERRUPT_H_DOORBELL
-	beq	17f
-	andi.	r0, r7, MSR_EE		/* were interrupts hard-enabled? */
-	beq	18f
-	mtmsrd	r7, 1			/* if so then re-enable them */
-18:	mtlr	r8
+	mtlr	r8
 	blr
 
-16:	mtspr	SPRN_HSRR0, r8		/* jump to reloc-on external vector */
-	mtspr	SPRN_HSRR1, r7
-	b	exc_virt_0x4500_hardware_interrupt
-
-17:	mtspr	SPRN_HSRR0, r8
-	mtspr	SPRN_HSRR1, r7
-	b	exc_virt_0x4e80_h_doorbell
-
 kvmppc_primary_no_guest:
 	/* We handle this much like a ceded vcpu */
 	/* put the HDEC into the DEC, since HDEC interrupts don't wake us */
@@ -1258,6 +1223,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 
 	stw	r12,VCPU_TRAP(r9)
 
+	/*
+	 * Now that we have saved away SRR0/1 and HSRR0/1,
+	 * interrupts are recoverable in principle, so set MSR_RI.
+	 * This becomes important for relocation-on interrupts from
+	 * the guest, which we can get in radix mode on POWER9.
+	 */
+	li	r0, MSR_RI
+	mtmsrd	r0, 1
+
 #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
 	addi	r3, r9, VCPU_TB_RMINTR
 	mr	r4, r9
-- 
2.11.0


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 1/2] KVM: PPC: Book3S HV: Simplify machine check handling
  2017-06-27  6:07 ` Paul Mackerras
@ 2019-02-20  1:05 ` Paul Mackerras
  -1 siblings, 0 replies; 20+ messages in thread
From: Paul Mackerras @ 2019-02-20  1:05 UTC (permalink / raw)
  To: linuxppc-dev, kvm, kvm-ppc

This makes the handling of machine check interrupts that occur inside
a guest simpler and more robust, with less done in assembler code and
in real mode.

Now, when a machine check occurs inside a guest, we always get the
machine check event struct and put a copy in the vcpu struct for the
vcpu where the machine check occurred.  We no longer call
machine_check_queue_event() from kvmppc_realmode_mc_power7(), because
on POWER8, when a vcpu is running on an offline secondary thread and
we call machine_check_queue_event(), that calls irq_work_queue(),
which doesn't work because the CPU is offline, but instead triggers
the WARN_ON(lazy_irq_pending()) in pnv_smp_cpu_kill_self() (which
fires again and again because nothing clears the condition).

All that machine_check_queue_event() actually does is to cause the
event to be printed to the console.  For a machine check occurring in
the guest, we now print the event in kvmppc_handle_exit_hv()
instead.

The assembly code at label machine_check_realmode now just calls C
code and then continues exiting the guest.  We no longer either
synthesize a machine check for the guest in assembly code or return
to the guest without a machine check.

The code in kvmppc_handle_exit_hv() is extended to handle the case
where the guest is not FWNMI-capable.  In that case we now always
synthesize a machine check interrupt for the guest.  Previously, if
the host thinks it has recovered the machine check fully, it would
return to the guest without any notification that the machine check
had occurred.  If the machine check was caused by some action of the
guest (such as creating duplicate SLB entries), it is much better to
tell the guest that it has caused a problem.  Therefore we now always
generate a machine check interrupt for guests that are not
FWNMI-capable.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/include/asm/kvm_ppc.h      |  3 +-
 arch/powerpc/kvm/book3s.c               |  7 +++++
 arch/powerpc/kvm/book3s_hv.c            | 18 +++++++++--
 arch/powerpc/kvm/book3s_hv_ras.c        | 56 +++++++++------------------------
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 40 ++---------------------
 5 files changed, 42 insertions(+), 82 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index b3bf4f6..d283d31 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -143,6 +143,7 @@ extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu);
 
 extern int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu);
 extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu);
+extern void kvmppc_core_queue_machine_check(struct kvm_vcpu *vcpu, ulong flags);
 extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags);
 extern void kvmppc_core_queue_fpunavail(struct kvm_vcpu *vcpu);
 extern void kvmppc_core_queue_vec_unavail(struct kvm_vcpu *vcpu);
@@ -646,7 +647,7 @@ long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target,
                             unsigned int yield_count);
 long kvmppc_h_random(struct kvm_vcpu *vcpu);
 void kvmhv_commence_exit(int trap);
-long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu);
+void kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu);
 void kvmppc_subcore_enter_guest(void);
 void kvmppc_subcore_exit_guest(void);
 long kvmppc_realmode_hmi_handler(void);
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 22a46c6..10c5579 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -195,6 +195,13 @@ void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec)
 }
 EXPORT_SYMBOL_GPL(kvmppc_book3s_queue_irqprio);
 
+void kvmppc_core_queue_machine_check(struct kvm_vcpu *vcpu, ulong flags)
+{
+	/* might as well deliver this straight away */
+	kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_MACHINE_CHECK, flags);
+}
+EXPORT_SYMBOL_GPL(kvmppc_core_queue_machine_check);
+
 void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags)
 {
 	/* might as well deliver this straight away */
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 1860c0b..d8bf05a 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1215,6 +1215,22 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		r = RESUME_GUEST;
 		break;
 	case BOOK3S_INTERRUPT_MACHINE_CHECK:
+		/* Print the MCE event to host console. */
+		machine_check_print_event_info(&vcpu->arch.mce_evt, false);
+
+		/*
+		 * If the guest can do FWNMI, exit to userspace so it can
+		 * deliver a FWNMI to the guest.
+		 * Otherwise we synthesize a machine check for the guest
+		 * so that it knows that the machine check occurred.
+		 */
+		if (!vcpu->kvm->arch.fwnmi_enabled) {
+			ulong flags = vcpu->arch.shregs.msr & 0x083c0000;
+			kvmppc_core_queue_machine_check(vcpu, flags);
+			r = RESUME_GUEST;
+			break;
+		}
+
 		/* Exit to guest with KVM_EXIT_NMI as exit reason */
 		run->exit_reason = KVM_EXIT_NMI;
 		run->hw.hardware_exit_reason = vcpu->arch.trap;
@@ -1227,8 +1243,6 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			run->flags |= KVM_RUN_PPC_NMI_DISP_NOT_RECOV;
 
 		r = RESUME_HOST;
-		/* Print the MCE event to host console. */
-		machine_check_print_event_info(&vcpu->arch.mce_evt, false);
 		break;
 	case BOOK3S_INTERRUPT_PROGRAM:
 	{
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
index 0787f12..9aa10b1 100644
--- a/arch/powerpc/kvm/book3s_hv_ras.c
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -69,7 +69,7 @@ static void reload_slb(struct kvm_vcpu *vcpu)
  *
  * Returns: 0 => exit guest, 1 => deliver machine check to guest
  */
-static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
+static void kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
 {
 	unsigned long srr1 = vcpu->arch.shregs.msr;
 	struct machine_check_event mce_evt;
@@ -111,52 +111,24 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
 	}
 
 	/*
-	 * See if we have already handled the condition in the linux host.
-	 * We assume that if the condition is recovered then linux host
-	 * will have generated an error log event that we will pick
-	 * up and log later.
-	 * Don't release mce event now. We will queue up the event so that
-	 * we can log the MCE event info on host console.
+	 * Now get the event and stash it in the vcpu struct so it can
+	 * be handled by the primary thread in virtual mode.  We can't
+	 * call machine_check_queue_event() here if we are running on
+	 * an offline secondary thread.
 	 */
-	if (!get_mce_event(&mce_evt, MCE_EVENT_DONTRELEASE))
-		goto out;
-
-	if (mce_evt.version == MCE_V1 &&
-	    (mce_evt.severity == MCE_SEV_NO_ERROR ||
-	     mce_evt.disposition == MCE_DISPOSITION_RECOVERED))
-		handled = 1;
-
-out:
-	/*
-	 * For guest that supports FWNMI capability, hook the MCE event into
-	 * vcpu structure. We are going to exit the guest with KVM_EXIT_NMI
-	 * exit reason. On our way to exit we will pull this event from vcpu
-	 * structure and print it from thread 0 of the core/subcore.
-	 *
-	 * For guest that does not support FWNMI capability (old QEMU):
-	 * We are now going enter guest either through machine check
-	 * interrupt (for unhandled errors) or will continue from
-	 * current HSRR0 (for handled errors) in guest. Hence
-	 * queue up the event so that we can log it from host console later.
-	 */
-	if (vcpu->kvm->arch.fwnmi_enabled) {
-		/*
-		 * Hook up the mce event on to vcpu structure.
-		 * First clear the old event.
-		 */
-		memset(&vcpu->arch.mce_evt, 0, sizeof(vcpu->arch.mce_evt));
-		if (get_mce_event(&mce_evt, MCE_EVENT_RELEASE)) {
-			vcpu->arch.mce_evt = mce_evt;
-		}
-	} else
-		machine_check_queue_event();
+	if (get_mce_event(&mce_evt, MCE_EVENT_RELEASE)) {
+		if (handled && mce_evt.version == MCE_V1)
+			mce_evt.disposition = MCE_DISPOSITION_RECOVERED;
+	} else {
+		memset(&mce_evt, 0, sizeof(mce_evt));
+	}
 
-	return handled;
+	vcpu->arch.mce_evt = mce_evt;
 }
 
-long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu)
+void kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu)
 {
-	return kvmppc_realmode_mc_power7(vcpu);
+	kvmppc_realmode_mc_power7(vcpu);
 }
 
 /* Check if dynamic split is in force and return subcore size accordingly. */
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 9b8d50a..f24f6a2 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -2826,49 +2826,15 @@ kvm_cede_exit:
 #endif /* CONFIG_KVM_XICS */
 3:	b	guest_exit_cont
 
-	/* Try to handle a machine check in real mode */
+	/* Try to do machine check recovery in real mode */
 machine_check_realmode:
 	mr	r3, r9		/* get vcpu pointer */
 	bl	kvmppc_realmode_machine_check
 	nop
+	/* all machine checks go to virtual mode for further handling */
 	ld	r9, HSTATE_KVM_VCPU(r13)
 	li	r12, BOOK3S_INTERRUPT_MACHINE_CHECK
-	/*
-	 * For the guest that is FWNMI capable, deliver all the MCE errors
-	 * (handled/unhandled) by exiting the guest with KVM_EXIT_NMI exit
-	 * reason. This new approach injects machine check errors in guest
-	 * address space to guest with additional information in the form
-	 * of RTAS event, thus enabling guest kernel to suitably handle
-	 * such errors.
-	 *
-	 * For the guest that is not FWNMI capable (old QEMU) fallback
-	 * to old behaviour for backward compatibility:
-	 * Deliver unhandled/fatal (e.g. UE) MCE errors to guest either
-	 * through machine check interrupt (set HSRR0 to 0x200).
-	 * For handled errors (no-fatal), just go back to guest execution
-	 * with current HSRR0.
-	 * if we receive machine check with MSR(RI=0) then deliver it to
-	 * guest as machine check causing guest to crash.
-	 */
-	ld	r11, VCPU_MSR(r9)
-	rldicl.	r0, r11, 64-MSR_HV_LG, 63 /* check if it happened in HV mode */
-	bne	guest_exit_cont		/* if so, exit to host */
-	/* Check if guest is capable of handling NMI exit */
-	ld	r10, VCPU_KVM(r9)
-	lbz	r10, KVM_FWNMI(r10)
-	cmpdi	r10, 1			/* FWNMI capable? */
-	beq	guest_exit_cont		/* if so, exit with KVM_EXIT_NMI. */
-
-	/* if not, fall through for backward compatibility. */
-	andi.	r10, r11, MSR_RI	/* check for unrecoverable exception */
-	beq	1f			/* Deliver a machine check to guest */
-	ld	r10, VCPU_PC(r9)
-	cmpdi	r3, 0		/* Did we handle MCE ? */
-	bne	2f	/* Continue guest execution. */
-	/* If not, deliver a machine check.  SRR0/1 are already set */
-1:	li	r10, BOOK3S_INTERRUPT_MACHINE_CHECK
-	bl	kvmppc_msr_interrupt
-2:	b	fast_interrupt_c_return
+	b	guest_exit_cont
 
 /*
  * Call C code to handle a HMI in real mode.
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 1/2] KVM: PPC: Book3S HV: Simplify machine check handling
@ 2019-02-20  1:05 ` Paul Mackerras
  0 siblings, 0 replies; 20+ messages in thread
From: Paul Mackerras @ 2019-02-20  1:05 UTC (permalink / raw)
  To: linuxppc-dev, kvm, kvm-ppc

This makes the handling of machine check interrupts that occur inside
a guest simpler and more robust, with less done in assembler code and
in real mode.

Now, when a machine check occurs inside a guest, we always get the
machine check event struct and put a copy in the vcpu struct for the
vcpu where the machine check occurred.  We no longer call
machine_check_queue_event() from kvmppc_realmode_mc_power7(), because
on POWER8, when a vcpu is running on an offline secondary thread and
we call machine_check_queue_event(), that calls irq_work_queue(),
which doesn't work because the CPU is offline, but instead triggers
the WARN_ON(lazy_irq_pending()) in pnv_smp_cpu_kill_self() (which
fires again and again because nothing clears the condition).

All that machine_check_queue_event() actually does is to cause the
event to be printed to the console.  For a machine check occurring in
the guest, we now print the event in kvmppc_handle_exit_hv()
instead.

The assembly code at label machine_check_realmode now just calls C
code and then continues exiting the guest.  We no longer either
synthesize a machine check for the guest in assembly code or return
to the guest without a machine check.

The code in kvmppc_handle_exit_hv() is extended to handle the case
where the guest is not FWNMI-capable.  In that case we now always
synthesize a machine check interrupt for the guest.  Previously, if
the host thinks it has recovered the machine check fully, it would
return to the guest without any notification that the machine check
had occurred.  If the machine check was caused by some action of the
guest (such as creating duplicate SLB entries), it is much better to
tell the guest that it has caused a problem.  Therefore we now always
generate a machine check interrupt for guests that are not
FWNMI-capable.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/include/asm/kvm_ppc.h      |  3 +-
 arch/powerpc/kvm/book3s.c               |  7 +++++
 arch/powerpc/kvm/book3s_hv.c            | 18 +++++++++--
 arch/powerpc/kvm/book3s_hv_ras.c        | 56 +++++++++------------------------
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 40 ++---------------------
 5 files changed, 42 insertions(+), 82 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index b3bf4f6..d283d31 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -143,6 +143,7 @@ extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu);
 
 extern int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu);
 extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu);
+extern void kvmppc_core_queue_machine_check(struct kvm_vcpu *vcpu, ulong flags);
 extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags);
 extern void kvmppc_core_queue_fpunavail(struct kvm_vcpu *vcpu);
 extern void kvmppc_core_queue_vec_unavail(struct kvm_vcpu *vcpu);
@@ -646,7 +647,7 @@ long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target,
                             unsigned int yield_count);
 long kvmppc_h_random(struct kvm_vcpu *vcpu);
 void kvmhv_commence_exit(int trap);
-long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu);
+void kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu);
 void kvmppc_subcore_enter_guest(void);
 void kvmppc_subcore_exit_guest(void);
 long kvmppc_realmode_hmi_handler(void);
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 22a46c6..10c5579 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -195,6 +195,13 @@ void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec)
 }
 EXPORT_SYMBOL_GPL(kvmppc_book3s_queue_irqprio);
 
+void kvmppc_core_queue_machine_check(struct kvm_vcpu *vcpu, ulong flags)
+{
+	/* might as well deliver this straight away */
+	kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_MACHINE_CHECK, flags);
+}
+EXPORT_SYMBOL_GPL(kvmppc_core_queue_machine_check);
+
 void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags)
 {
 	/* might as well deliver this straight away */
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 1860c0b..d8bf05a 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1215,6 +1215,22 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		r = RESUME_GUEST;
 		break;
 	case BOOK3S_INTERRUPT_MACHINE_CHECK:
+		/* Print the MCE event to host console. */
+		machine_check_print_event_info(&vcpu->arch.mce_evt, false);
+
+		/*
+		 * If the guest can do FWNMI, exit to userspace so it can
+		 * deliver a FWNMI to the guest.
+		 * Otherwise we synthesize a machine check for the guest
+		 * so that it knows that the machine check occurred.
+		 */
+		if (!vcpu->kvm->arch.fwnmi_enabled) {
+			ulong flags = vcpu->arch.shregs.msr & 0x083c0000;
+			kvmppc_core_queue_machine_check(vcpu, flags);
+			r = RESUME_GUEST;
+			break;
+		}
+
 		/* Exit to guest with KVM_EXIT_NMI as exit reason */
 		run->exit_reason = KVM_EXIT_NMI;
 		run->hw.hardware_exit_reason = vcpu->arch.trap;
@@ -1227,8 +1243,6 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			run->flags |= KVM_RUN_PPC_NMI_DISP_NOT_RECOV;
 
 		r = RESUME_HOST;
-		/* Print the MCE event to host console. */
-		machine_check_print_event_info(&vcpu->arch.mce_evt, false);
 		break;
 	case BOOK3S_INTERRUPT_PROGRAM:
 	{
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
index 0787f12..9aa10b1 100644
--- a/arch/powerpc/kvm/book3s_hv_ras.c
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -69,7 +69,7 @@ static void reload_slb(struct kvm_vcpu *vcpu)
  *
  * Returns: 0 => exit guest, 1 => deliver machine check to guest
  */
-static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
+static void kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
 {
 	unsigned long srr1 = vcpu->arch.shregs.msr;
 	struct machine_check_event mce_evt;
@@ -111,52 +111,24 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
 	}
 
 	/*
-	 * See if we have already handled the condition in the linux host.
-	 * We assume that if the condition is recovered then linux host
-	 * will have generated an error log event that we will pick
-	 * up and log later.
-	 * Don't release mce event now. We will queue up the event so that
-	 * we can log the MCE event info on host console.
+	 * Now get the event and stash it in the vcpu struct so it can
+	 * be handled by the primary thread in virtual mode.  We can't
+	 * call machine_check_queue_event() here if we are running on
+	 * an offline secondary thread.
 	 */
-	if (!get_mce_event(&mce_evt, MCE_EVENT_DONTRELEASE))
-		goto out;
-
-	if (mce_evt.version = MCE_V1 &&
-	    (mce_evt.severity = MCE_SEV_NO_ERROR ||
-	     mce_evt.disposition = MCE_DISPOSITION_RECOVERED))
-		handled = 1;
-
-out:
-	/*
-	 * For guest that supports FWNMI capability, hook the MCE event into
-	 * vcpu structure. We are going to exit the guest with KVM_EXIT_NMI
-	 * exit reason. On our way to exit we will pull this event from vcpu
-	 * structure and print it from thread 0 of the core/subcore.
-	 *
-	 * For guest that does not support FWNMI capability (old QEMU):
-	 * We are now going enter guest either through machine check
-	 * interrupt (for unhandled errors) or will continue from
-	 * current HSRR0 (for handled errors) in guest. Hence
-	 * queue up the event so that we can log it from host console later.
-	 */
-	if (vcpu->kvm->arch.fwnmi_enabled) {
-		/*
-		 * Hook up the mce event on to vcpu structure.
-		 * First clear the old event.
-		 */
-		memset(&vcpu->arch.mce_evt, 0, sizeof(vcpu->arch.mce_evt));
-		if (get_mce_event(&mce_evt, MCE_EVENT_RELEASE)) {
-			vcpu->arch.mce_evt = mce_evt;
-		}
-	} else
-		machine_check_queue_event();
+	if (get_mce_event(&mce_evt, MCE_EVENT_RELEASE)) {
+		if (handled && mce_evt.version = MCE_V1)
+			mce_evt.disposition = MCE_DISPOSITION_RECOVERED;
+	} else {
+		memset(&mce_evt, 0, sizeof(mce_evt));
+	}
 
-	return handled;
+	vcpu->arch.mce_evt = mce_evt;
 }
 
-long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu)
+void kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu)
 {
-	return kvmppc_realmode_mc_power7(vcpu);
+	kvmppc_realmode_mc_power7(vcpu);
 }
 
 /* Check if dynamic split is in force and return subcore size accordingly. */
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 9b8d50a..f24f6a2 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -2826,49 +2826,15 @@ kvm_cede_exit:
 #endif /* CONFIG_KVM_XICS */
 3:	b	guest_exit_cont
 
-	/* Try to handle a machine check in real mode */
+	/* Try to do machine check recovery in real mode */
 machine_check_realmode:
 	mr	r3, r9		/* get vcpu pointer */
 	bl	kvmppc_realmode_machine_check
 	nop
+	/* all machine checks go to virtual mode for further handling */
 	ld	r9, HSTATE_KVM_VCPU(r13)
 	li	r12, BOOK3S_INTERRUPT_MACHINE_CHECK
-	/*
-	 * For the guest that is FWNMI capable, deliver all the MCE errors
-	 * (handled/unhandled) by exiting the guest with KVM_EXIT_NMI exit
-	 * reason. This new approach injects machine check errors in guest
-	 * address space to guest with additional information in the form
-	 * of RTAS event, thus enabling guest kernel to suitably handle
-	 * such errors.
-	 *
-	 * For the guest that is not FWNMI capable (old QEMU) fallback
-	 * to old behaviour for backward compatibility:
-	 * Deliver unhandled/fatal (e.g. UE) MCE errors to guest either
-	 * through machine check interrupt (set HSRR0 to 0x200).
-	 * For handled errors (no-fatal), just go back to guest execution
-	 * with current HSRR0.
-	 * if we receive machine check with MSR(RI=0) then deliver it to
-	 * guest as machine check causing guest to crash.
-	 */
-	ld	r11, VCPU_MSR(r9)
-	rldicl.	r0, r11, 64-MSR_HV_LG, 63 /* check if it happened in HV mode */
-	bne	guest_exit_cont		/* if so, exit to host */
-	/* Check if guest is capable of handling NMI exit */
-	ld	r10, VCPU_KVM(r9)
-	lbz	r10, KVM_FWNMI(r10)
-	cmpdi	r10, 1			/* FWNMI capable? */
-	beq	guest_exit_cont		/* if so, exit with KVM_EXIT_NMI. */
-
-	/* if not, fall through for backward compatibility. */
-	andi.	r10, r11, MSR_RI	/* check for unrecoverable exception */
-	beq	1f			/* Deliver a machine check to guest */
-	ld	r10, VCPU_PC(r9)
-	cmpdi	r3, 0		/* Did we handle MCE ? */
-	bne	2f	/* Continue guest execution. */
-	/* If not, deliver a machine check.  SRR0/1 are already set */
-1:	li	r10, BOOK3S_INTERRUPT_MACHINE_CHECK
-	bl	kvmppc_msr_interrupt
-2:	b	fast_interrupt_c_return
+	b	guest_exit_cont
 
 /*
  * Call C code to handle a HMI in real mode.
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 2/2] powerpc/64s: Better printing of machine check info for guest MCEs
  2019-02-20  1:05 ` Paul Mackerras
@ 2019-02-20  1:06   ` Paul Mackerras
  -1 siblings, 0 replies; 20+ messages in thread
From: Paul Mackerras @ 2019-02-20  1:06 UTC (permalink / raw)
  To: linuxppc-dev, kvm, kvm-ppc

This adds an "in_guest" parameter to machine_check_print_event_info()
so that we can avoid trying to translate guest NIP values into
symbolic form using the host kernel's symbol table.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/include/asm/mce.h        | 2 +-
 arch/powerpc/kernel/mce.c             | 8 +++++---
 arch/powerpc/kvm/book3s_hv.c          | 4 ++--
 arch/powerpc/platforms/powernv/opal.c | 2 +-
 4 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index a8b8903..17996bc 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -209,7 +209,7 @@ extern int get_mce_event(struct machine_check_event *mce, bool release);
 extern void release_mce_event(void);
 extern void machine_check_queue_event(void);
 extern void machine_check_print_event_info(struct machine_check_event *evt,
-					   bool user_mode);
+					   bool user_mode, bool in_guest);
 #ifdef CONFIG_PPC_BOOK3S_64
 void flush_and_reload_slb(void);
 #endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index bd933a7..d01b690 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -301,13 +301,13 @@ static void machine_check_process_queued_event(struct irq_work *work)
 	while (__this_cpu_read(mce_queue_count) > 0) {
 		index = __this_cpu_read(mce_queue_count) - 1;
 		evt = this_cpu_ptr(&mce_event_queue[index]);
-		machine_check_print_event_info(evt, false);
+		machine_check_print_event_info(evt, false, false);
 		__this_cpu_dec(mce_queue_count);
 	}
 }
 
 void machine_check_print_event_info(struct machine_check_event *evt,
-				    bool user_mode)
+				    bool user_mode, bool in_guest)
 {
 	const char *level, *sevstr, *subtype;
 	static const char *mc_ue_types[] = {
@@ -387,7 +387,9 @@ void machine_check_print_event_info(struct machine_check_event *evt,
 	       evt->disposition == MCE_DISPOSITION_RECOVERED ?
 	       "Recovered" : "Not recovered");
 
-	if (user_mode) {
+	if (in_guest) {
+		printk("%s  Guest NIP: %016llx\n", evt->srr0);
+	} else if (user_mode) {
 		printk("%s  NIP: [%016llx] PID: %d Comm: %s\n", level,
 			evt->srr0, current->pid, current->comm);
 	} else {
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index d8bf05a..81cba4b 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1216,7 +1216,7 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		break;
 	case BOOK3S_INTERRUPT_MACHINE_CHECK:
 		/* Print the MCE event to host console. */
-		machine_check_print_event_info(&vcpu->arch.mce_evt, false);
+		machine_check_print_event_info(&vcpu->arch.mce_evt, false, true);
 
 		/*
 		 * If the guest can do FWNMI, exit to userspace so it can
@@ -1406,7 +1406,7 @@ static int kvmppc_handle_nested_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
 		/* Pass the machine check to the L1 guest */
 		r = RESUME_HOST;
 		/* Print the MCE event to host console. */
-		machine_check_print_event_info(&vcpu->arch.mce_evt, false);
+		machine_check_print_event_info(&vcpu->arch.mce_evt, false, true);
 		break;
 	/*
 	 * We get these next two if the guest accesses a page which it thinks
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 79586f1..05c85be 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -587,7 +587,7 @@ int opal_machine_check(struct pt_regs *regs)
 		       evt.version);
 		return 0;
 	}
-	machine_check_print_event_info(&evt, user_mode(regs));
+	machine_check_print_event_info(&evt, user_mode(regs), false);
 
 	if (opal_recover_mce(regs, &evt))
 		return 1;
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 2/2] powerpc/64s: Better printing of machine check info for guest MCEs
@ 2019-02-20  1:06   ` Paul Mackerras
  0 siblings, 0 replies; 20+ messages in thread
From: Paul Mackerras @ 2019-02-20  1:06 UTC (permalink / raw)
  To: linuxppc-dev, kvm, kvm-ppc

This adds an "in_guest" parameter to machine_check_print_event_info()
so that we can avoid trying to translate guest NIP values into
symbolic form using the host kernel's symbol table.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/include/asm/mce.h        | 2 +-
 arch/powerpc/kernel/mce.c             | 8 +++++---
 arch/powerpc/kvm/book3s_hv.c          | 4 ++--
 arch/powerpc/platforms/powernv/opal.c | 2 +-
 4 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index a8b8903..17996bc 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -209,7 +209,7 @@ extern int get_mce_event(struct machine_check_event *mce, bool release);
 extern void release_mce_event(void);
 extern void machine_check_queue_event(void);
 extern void machine_check_print_event_info(struct machine_check_event *evt,
-					   bool user_mode);
+					   bool user_mode, bool in_guest);
 #ifdef CONFIG_PPC_BOOK3S_64
 void flush_and_reload_slb(void);
 #endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index bd933a7..d01b690 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -301,13 +301,13 @@ static void machine_check_process_queued_event(struct irq_work *work)
 	while (__this_cpu_read(mce_queue_count) > 0) {
 		index = __this_cpu_read(mce_queue_count) - 1;
 		evt = this_cpu_ptr(&mce_event_queue[index]);
-		machine_check_print_event_info(evt, false);
+		machine_check_print_event_info(evt, false, false);
 		__this_cpu_dec(mce_queue_count);
 	}
 }
 
 void machine_check_print_event_info(struct machine_check_event *evt,
-				    bool user_mode)
+				    bool user_mode, bool in_guest)
 {
 	const char *level, *sevstr, *subtype;
 	static const char *mc_ue_types[] = {
@@ -387,7 +387,9 @@ void machine_check_print_event_info(struct machine_check_event *evt,
 	       evt->disposition = MCE_DISPOSITION_RECOVERED ?
 	       "Recovered" : "Not recovered");
 
-	if (user_mode) {
+	if (in_guest) {
+		printk("%s  Guest NIP: %016llx\n", evt->srr0);
+	} else if (user_mode) {
 		printk("%s  NIP: [%016llx] PID: %d Comm: %s\n", level,
 			evt->srr0, current->pid, current->comm);
 	} else {
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index d8bf05a..81cba4b 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1216,7 +1216,7 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		break;
 	case BOOK3S_INTERRUPT_MACHINE_CHECK:
 		/* Print the MCE event to host console. */
-		machine_check_print_event_info(&vcpu->arch.mce_evt, false);
+		machine_check_print_event_info(&vcpu->arch.mce_evt, false, true);
 
 		/*
 		 * If the guest can do FWNMI, exit to userspace so it can
@@ -1406,7 +1406,7 @@ static int kvmppc_handle_nested_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
 		/* Pass the machine check to the L1 guest */
 		r = RESUME_HOST;
 		/* Print the MCE event to host console. */
-		machine_check_print_event_info(&vcpu->arch.mce_evt, false);
+		machine_check_print_event_info(&vcpu->arch.mce_evt, false, true);
 		break;
 	/*
 	 * We get these next two if the guest accesses a page which it thinks
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 79586f1..05c85be 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -587,7 +587,7 @@ int opal_machine_check(struct pt_regs *regs)
 		       evt.version);
 		return 0;
 	}
-	machine_check_print_event_info(&evt, user_mode(regs));
+	machine_check_print_event_info(&evt, user_mode(regs), false);
 
 	if (opal_recover_mce(regs, &evt))
 		return 1;
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/2] KVM: PPC: Book3S HV: Simplify machine check handling
  2019-02-20  1:05 ` Paul Mackerras
@ 2019-02-20  9:32   ` Aravinda Prasad
  -1 siblings, 0 replies; 20+ messages in thread
From: Aravinda Prasad @ 2019-02-20  9:20 UTC (permalink / raw)
  To: Paul Mackerras, linuxppc-dev, kvm, kvm-ppc



On Wednesday 20 February 2019 06:35 AM, Paul Mackerras wrote:
> This makes the handling of machine check interrupts that occur inside
> a guest simpler and more robust, with less done in assembler code and
> in real mode.
> 
> Now, when a machine check occurs inside a guest, we always get the
> machine check event struct and put a copy in the vcpu struct for the
> vcpu where the machine check occurred.  We no longer call
> machine_check_queue_event() from kvmppc_realmode_mc_power7(), because
> on POWER8, when a vcpu is running on an offline secondary thread and
> we call machine_check_queue_event(), that calls irq_work_queue(),
> which doesn't work because the CPU is offline, but instead triggers
> the WARN_ON(lazy_irq_pending()) in pnv_smp_cpu_kill_self() (which
> fires again and again because nothing clears the condition).
> 
> All that machine_check_queue_event() actually does is to cause the
> event to be printed to the console.  For a machine check occurring in
> the guest, we now print the event in kvmppc_handle_exit_hv()
> instead.
> 
> The assembly code at label machine_check_realmode now just calls C
> code and then continues exiting the guest.  We no longer either
> synthesize a machine check for the guest in assembly code or return
> to the guest without a machine check.
> 
> The code in kvmppc_handle_exit_hv() is extended to handle the case
> where the guest is not FWNMI-capable.  In that case we now always
> synthesize a machine check interrupt for the guest.  Previously, if
> the host thinks it has recovered the machine check fully, it would
> return to the guest without any notification that the machine check
> had occurred.  If the machine check was caused by some action of the
> guest (such as creating duplicate SLB entries), it is much better to
> tell the guest that it has caused a problem.  Therefore we now always
> generate a machine check interrupt for guests that are not
> FWNMI-capable.
> 
> Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
> ---
>  arch/powerpc/include/asm/kvm_ppc.h      |  3 +-
>  arch/powerpc/kvm/book3s.c               |  7 +++++
>  arch/powerpc/kvm/book3s_hv.c            | 18 +++++++++--
>  arch/powerpc/kvm/book3s_hv_ras.c        | 56 +++++++++------------------------
>  arch/powerpc/kvm/book3s_hv_rmhandlers.S | 40 ++---------------------
>  5 files changed, 42 insertions(+), 82 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
> index b3bf4f6..d283d31 100644
> --- a/arch/powerpc/include/asm/kvm_ppc.h
> +++ b/arch/powerpc/include/asm/kvm_ppc.h
> @@ -143,6 +143,7 @@ extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu);
> 
>  extern int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu);
>  extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu);
> +extern void kvmppc_core_queue_machine_check(struct kvm_vcpu *vcpu, ulong flags);
>  extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags);
>  extern void kvmppc_core_queue_fpunavail(struct kvm_vcpu *vcpu);
>  extern void kvmppc_core_queue_vec_unavail(struct kvm_vcpu *vcpu);
> @@ -646,7 +647,7 @@ long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target,
>                              unsigned int yield_count);
>  long kvmppc_h_random(struct kvm_vcpu *vcpu);
>  void kvmhv_commence_exit(int trap);
> -long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu);
> +void kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu);
>  void kvmppc_subcore_enter_guest(void);
>  void kvmppc_subcore_exit_guest(void);
>  long kvmppc_realmode_hmi_handler(void);
> diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
> index 22a46c6..10c5579 100644
> --- a/arch/powerpc/kvm/book3s.c
> +++ b/arch/powerpc/kvm/book3s.c
> @@ -195,6 +195,13 @@ void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec)
>  }
>  EXPORT_SYMBOL_GPL(kvmppc_book3s_queue_irqprio);
> 
> +void kvmppc_core_queue_machine_check(struct kvm_vcpu *vcpu, ulong flags)
> +{
> +	/* might as well deliver this straight away */
> +	kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_MACHINE_CHECK, flags);
> +}
> +EXPORT_SYMBOL_GPL(kvmppc_core_queue_machine_check);
> +
>  void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags)
>  {
>  	/* might as well deliver this straight away */
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index 1860c0b..d8bf05a 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -1215,6 +1215,22 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
>  		r = RESUME_GUEST;
>  		break;
>  	case BOOK3S_INTERRUPT_MACHINE_CHECK:
> +		/* Print the MCE event to host console. */
> +		machine_check_print_event_info(&vcpu->arch.mce_evt, false);
> +
> +		/*
> +		 * If the guest can do FWNMI, exit to userspace so it can
> +		 * deliver a FWNMI to the guest.
> +		 * Otherwise we synthesize a machine check for the guest
> +		 * so that it knows that the machine check occurred.
> +		 */
> +		if (!vcpu->kvm->arch.fwnmi_enabled) {
> +			ulong flags = vcpu->arch.shregs.msr & 0x083c0000;
> +			kvmppc_core_queue_machine_check(vcpu, flags);
> +			r = RESUME_GUEST;
> +			break;
> +		}
> +
>  		/* Exit to guest with KVM_EXIT_NMI as exit reason */
>  		run->exit_reason = KVM_EXIT_NMI;
>  		run->hw.hardware_exit_reason = vcpu->arch.trap;
> @@ -1227,8 +1243,6 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
>  			run->flags |= KVM_RUN_PPC_NMI_DISP_NOT_RECOV;
> 
>  		r = RESUME_HOST;
> -		/* Print the MCE event to host console. */
> -		machine_check_print_event_info(&vcpu->arch.mce_evt, false);
>  		break;
>  	case BOOK3S_INTERRUPT_PROGRAM:
>  	{
> diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
> index 0787f12..9aa10b1 100644
> --- a/arch/powerpc/kvm/book3s_hv_ras.c
> +++ b/arch/powerpc/kvm/book3s_hv_ras.c
> @@ -69,7 +69,7 @@ static void reload_slb(struct kvm_vcpu *vcpu)
>   *
>   * Returns: 0 => exit guest, 1 => deliver machine check to guest

You have to remove the above comment as the function now returns nothing.

Reviewed-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>

Regards,
Aravinda

>   */
> -static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
> +static void kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
>  {
>  	unsigned long srr1 = vcpu->arch.shregs.msr;
>  	struct machine_check_event mce_evt;
> @@ -111,52 +111,24 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
>  	}
> 
>  	/*
> -	 * See if we have already handled the condition in the linux host.
> -	 * We assume that if the condition is recovered then linux host
> -	 * will have generated an error log event that we will pick
> -	 * up and log later.
> -	 * Don't release mce event now. We will queue up the event so that
> -	 * we can log the MCE event info on host console.
> +	 * Now get the event and stash it in the vcpu struct so it can
> +	 * be handled by the primary thread in virtual mode.  We can't
> +	 * call machine_check_queue_event() here if we are running on
> +	 * an offline secondary thread.
>  	 */
> -	if (!get_mce_event(&mce_evt, MCE_EVENT_DONTRELEASE))
> -		goto out;
> -
> -	if (mce_evt.version == MCE_V1 &&
> -	    (mce_evt.severity == MCE_SEV_NO_ERROR ||
> -	     mce_evt.disposition == MCE_DISPOSITION_RECOVERED))
> -		handled = 1;
> -
> -out:
> -	/*
> -	 * For guest that supports FWNMI capability, hook the MCE event into
> -	 * vcpu structure. We are going to exit the guest with KVM_EXIT_NMI
> -	 * exit reason. On our way to exit we will pull this event from vcpu
> -	 * structure and print it from thread 0 of the core/subcore.
> -	 *
> -	 * For guest that does not support FWNMI capability (old QEMU):
> -	 * We are now going enter guest either through machine check
> -	 * interrupt (for unhandled errors) or will continue from
> -	 * current HSRR0 (for handled errors) in guest. Hence
> -	 * queue up the event so that we can log it from host console later.
> -	 */
> -	if (vcpu->kvm->arch.fwnmi_enabled) {
> -		/*
> -		 * Hook up the mce event on to vcpu structure.
> -		 * First clear the old event.
> -		 */
> -		memset(&vcpu->arch.mce_evt, 0, sizeof(vcpu->arch.mce_evt));
> -		if (get_mce_event(&mce_evt, MCE_EVENT_RELEASE)) {
> -			vcpu->arch.mce_evt = mce_evt;
> -		}
> -	} else
> -		machine_check_queue_event();
> +	if (get_mce_event(&mce_evt, MCE_EVENT_RELEASE)) {
> +		if (handled && mce_evt.version == MCE_V1)
> +			mce_evt.disposition = MCE_DISPOSITION_RECOVERED;
> +	} else {
> +		memset(&mce_evt, 0, sizeof(mce_evt));
> +	}
> 
> -	return handled;
> +	vcpu->arch.mce_evt = mce_evt;
>  }
> 
> -long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu)
> +void kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu)
>  {
> -	return kvmppc_realmode_mc_power7(vcpu);
> +	kvmppc_realmode_mc_power7(vcpu);
>  }
> 
>  /* Check if dynamic split is in force and return subcore size accordingly. */
> diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> index 9b8d50a..f24f6a2 100644
> --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> @@ -2826,49 +2826,15 @@ kvm_cede_exit:
>  #endif /* CONFIG_KVM_XICS */
>  3:	b	guest_exit_cont
> 
> -	/* Try to handle a machine check in real mode */
> +	/* Try to do machine check recovery in real mode */
>  machine_check_realmode:
>  	mr	r3, r9		/* get vcpu pointer */
>  	bl	kvmppc_realmode_machine_check
>  	nop
> +	/* all machine checks go to virtual mode for further handling */
>  	ld	r9, HSTATE_KVM_VCPU(r13)
>  	li	r12, BOOK3S_INTERRUPT_MACHINE_CHECK
> -	/*
> -	 * For the guest that is FWNMI capable, deliver all the MCE errors
> -	 * (handled/unhandled) by exiting the guest with KVM_EXIT_NMI exit
> -	 * reason. This new approach injects machine check errors in guest
> -	 * address space to guest with additional information in the form
> -	 * of RTAS event, thus enabling guest kernel to suitably handle
> -	 * such errors.
> -	 *
> -	 * For the guest that is not FWNMI capable (old QEMU) fallback
> -	 * to old behaviour for backward compatibility:
> -	 * Deliver unhandled/fatal (e.g. UE) MCE errors to guest either
> -	 * through machine check interrupt (set HSRR0 to 0x200).
> -	 * For handled errors (no-fatal), just go back to guest execution
> -	 * with current HSRR0.
> -	 * if we receive machine check with MSR(RI=0) then deliver it to
> -	 * guest as machine check causing guest to crash.
> -	 */
> -	ld	r11, VCPU_MSR(r9)
> -	rldicl.	r0, r11, 64-MSR_HV_LG, 63 /* check if it happened in HV mode */
> -	bne	guest_exit_cont		/* if so, exit to host */
> -	/* Check if guest is capable of handling NMI exit */
> -	ld	r10, VCPU_KVM(r9)
> -	lbz	r10, KVM_FWNMI(r10)
> -	cmpdi	r10, 1			/* FWNMI capable? */
> -	beq	guest_exit_cont		/* if so, exit with KVM_EXIT_NMI. */
> -
> -	/* if not, fall through for backward compatibility. */
> -	andi.	r10, r11, MSR_RI	/* check for unrecoverable exception */
> -	beq	1f			/* Deliver a machine check to guest */
> -	ld	r10, VCPU_PC(r9)
> -	cmpdi	r3, 0		/* Did we handle MCE ? */
> -	bne	2f	/* Continue guest execution. */
> -	/* If not, deliver a machine check.  SRR0/1 are already set */
> -1:	li	r10, BOOK3S_INTERRUPT_MACHINE_CHECK
> -	bl	kvmppc_msr_interrupt
> -2:	b	fast_interrupt_c_return
> +	b	guest_exit_cont
> 
>  /*
>   * Call C code to handle a HMI in real mode.
> 

-- 
Regards,
Aravinda

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/2] powerpc/64s: Better printing of machine check info for guest MCEs
  2019-02-20  1:06   ` Paul Mackerras
@ 2019-02-20  9:33     ` Aravinda Prasad
  -1 siblings, 0 replies; 20+ messages in thread
From: Aravinda Prasad @ 2019-02-20  9:21 UTC (permalink / raw)
  To: Paul Mackerras, linuxppc-dev, kvm, kvm-ppc



On Wednesday 20 February 2019 06:36 AM, Paul Mackerras wrote:
> This adds an "in_guest" parameter to machine_check_print_event_info()
> so that we can avoid trying to translate guest NIP values into
> symbolic form using the host kernel's symbol table.

Reviewed-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>

Regards,
Aravinda

> 
> Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
> ---
>  arch/powerpc/include/asm/mce.h        | 2 +-
>  arch/powerpc/kernel/mce.c             | 8 +++++---
>  arch/powerpc/kvm/book3s_hv.c          | 4 ++--
>  arch/powerpc/platforms/powernv/opal.c | 2 +-
>  4 files changed, 9 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
> index a8b8903..17996bc 100644
> --- a/arch/powerpc/include/asm/mce.h
> +++ b/arch/powerpc/include/asm/mce.h
> @@ -209,7 +209,7 @@ extern int get_mce_event(struct machine_check_event *mce, bool release);
>  extern void release_mce_event(void);
>  extern void machine_check_queue_event(void);
>  extern void machine_check_print_event_info(struct machine_check_event *evt,
> -					   bool user_mode);
> +					   bool user_mode, bool in_guest);
>  #ifdef CONFIG_PPC_BOOK3S_64
>  void flush_and_reload_slb(void);
>  #endif /* CONFIG_PPC_BOOK3S_64 */
> diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
> index bd933a7..d01b690 100644
> --- a/arch/powerpc/kernel/mce.c
> +++ b/arch/powerpc/kernel/mce.c
> @@ -301,13 +301,13 @@ static void machine_check_process_queued_event(struct irq_work *work)
>  	while (__this_cpu_read(mce_queue_count) > 0) {
>  		index = __this_cpu_read(mce_queue_count) - 1;
>  		evt = this_cpu_ptr(&mce_event_queue[index]);
> -		machine_check_print_event_info(evt, false);
> +		machine_check_print_event_info(evt, false, false);
>  		__this_cpu_dec(mce_queue_count);
>  	}
>  }
> 
>  void machine_check_print_event_info(struct machine_check_event *evt,
> -				    bool user_mode)
> +				    bool user_mode, bool in_guest)
>  {
>  	const char *level, *sevstr, *subtype;
>  	static const char *mc_ue_types[] = {
> @@ -387,7 +387,9 @@ void machine_check_print_event_info(struct machine_check_event *evt,
>  	       evt->disposition == MCE_DISPOSITION_RECOVERED ?
>  	       "Recovered" : "Not recovered");
> 
> -	if (user_mode) {
> +	if (in_guest) {
> +		printk("%s  Guest NIP: %016llx\n", evt->srr0);
> +	} else if (user_mode) {
>  		printk("%s  NIP: [%016llx] PID: %d Comm: %s\n", level,
>  			evt->srr0, current->pid, current->comm);
>  	} else {
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index d8bf05a..81cba4b 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -1216,7 +1216,7 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
>  		break;
>  	case BOOK3S_INTERRUPT_MACHINE_CHECK:
>  		/* Print the MCE event to host console. */
> -		machine_check_print_event_info(&vcpu->arch.mce_evt, false);
> +		machine_check_print_event_info(&vcpu->arch.mce_evt, false, true);
> 
>  		/*
>  		 * If the guest can do FWNMI, exit to userspace so it can
> @@ -1406,7 +1406,7 @@ static int kvmppc_handle_nested_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
>  		/* Pass the machine check to the L1 guest */
>  		r = RESUME_HOST;
>  		/* Print the MCE event to host console. */
> -		machine_check_print_event_info(&vcpu->arch.mce_evt, false);
> +		machine_check_print_event_info(&vcpu->arch.mce_evt, false, true);
>  		break;
>  	/*
>  	 * We get these next two if the guest accesses a page which it thinks
> diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
> index 79586f1..05c85be 100644
> --- a/arch/powerpc/platforms/powernv/opal.c
> +++ b/arch/powerpc/platforms/powernv/opal.c
> @@ -587,7 +587,7 @@ int opal_machine_check(struct pt_regs *regs)
>  		       evt.version);
>  		return 0;
>  	}
> -	machine_check_print_event_info(&evt, user_mode(regs));
> +	machine_check_print_event_info(&evt, user_mode(regs), false);
> 
>  	if (opal_recover_mce(regs, &evt))
>  		return 1;
> 

-- 
Regards,
Aravinda

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/2] KVM: PPC: Book3S HV: Simplify machine check handling
@ 2019-02-20  9:32   ` Aravinda Prasad
  0 siblings, 0 replies; 20+ messages in thread
From: Aravinda Prasad @ 2019-02-20  9:32 UTC (permalink / raw)
  To: Paul Mackerras, linuxppc-dev, kvm, kvm-ppc



On Wednesday 20 February 2019 06:35 AM, Paul Mackerras wrote:
> This makes the handling of machine check interrupts that occur inside
> a guest simpler and more robust, with less done in assembler code and
> in real mode.
> 
> Now, when a machine check occurs inside a guest, we always get the
> machine check event struct and put a copy in the vcpu struct for the
> vcpu where the machine check occurred.  We no longer call
> machine_check_queue_event() from kvmppc_realmode_mc_power7(), because
> on POWER8, when a vcpu is running on an offline secondary thread and
> we call machine_check_queue_event(), that calls irq_work_queue(),
> which doesn't work because the CPU is offline, but instead triggers
> the WARN_ON(lazy_irq_pending()) in pnv_smp_cpu_kill_self() (which
> fires again and again because nothing clears the condition).
> 
> All that machine_check_queue_event() actually does is to cause the
> event to be printed to the console.  For a machine check occurring in
> the guest, we now print the event in kvmppc_handle_exit_hv()
> instead.
> 
> The assembly code at label machine_check_realmode now just calls C
> code and then continues exiting the guest.  We no longer either
> synthesize a machine check for the guest in assembly code or return
> to the guest without a machine check.
> 
> The code in kvmppc_handle_exit_hv() is extended to handle the case
> where the guest is not FWNMI-capable.  In that case we now always
> synthesize a machine check interrupt for the guest.  Previously, if
> the host thinks it has recovered the machine check fully, it would
> return to the guest without any notification that the machine check
> had occurred.  If the machine check was caused by some action of the
> guest (such as creating duplicate SLB entries), it is much better to
> tell the guest that it has caused a problem.  Therefore we now always
> generate a machine check interrupt for guests that are not
> FWNMI-capable.
> 
> Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
> ---
>  arch/powerpc/include/asm/kvm_ppc.h      |  3 +-
>  arch/powerpc/kvm/book3s.c               |  7 +++++
>  arch/powerpc/kvm/book3s_hv.c            | 18 +++++++++--
>  arch/powerpc/kvm/book3s_hv_ras.c        | 56 +++++++++------------------------
>  arch/powerpc/kvm/book3s_hv_rmhandlers.S | 40 ++---------------------
>  5 files changed, 42 insertions(+), 82 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
> index b3bf4f6..d283d31 100644
> --- a/arch/powerpc/include/asm/kvm_ppc.h
> +++ b/arch/powerpc/include/asm/kvm_ppc.h
> @@ -143,6 +143,7 @@ extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu);
> 
>  extern int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu);
>  extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu);
> +extern void kvmppc_core_queue_machine_check(struct kvm_vcpu *vcpu, ulong flags);
>  extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags);
>  extern void kvmppc_core_queue_fpunavail(struct kvm_vcpu *vcpu);
>  extern void kvmppc_core_queue_vec_unavail(struct kvm_vcpu *vcpu);
> @@ -646,7 +647,7 @@ long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target,
>                              unsigned int yield_count);
>  long kvmppc_h_random(struct kvm_vcpu *vcpu);
>  void kvmhv_commence_exit(int trap);
> -long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu);
> +void kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu);
>  void kvmppc_subcore_enter_guest(void);
>  void kvmppc_subcore_exit_guest(void);
>  long kvmppc_realmode_hmi_handler(void);
> diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
> index 22a46c6..10c5579 100644
> --- a/arch/powerpc/kvm/book3s.c
> +++ b/arch/powerpc/kvm/book3s.c
> @@ -195,6 +195,13 @@ void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec)
>  }
>  EXPORT_SYMBOL_GPL(kvmppc_book3s_queue_irqprio);
> 
> +void kvmppc_core_queue_machine_check(struct kvm_vcpu *vcpu, ulong flags)
> +{
> +	/* might as well deliver this straight away */
> +	kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_MACHINE_CHECK, flags);
> +}
> +EXPORT_SYMBOL_GPL(kvmppc_core_queue_machine_check);
> +
>  void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags)
>  {
>  	/* might as well deliver this straight away */
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index 1860c0b..d8bf05a 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -1215,6 +1215,22 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
>  		r = RESUME_GUEST;
>  		break;
>  	case BOOK3S_INTERRUPT_MACHINE_CHECK:
> +		/* Print the MCE event to host console. */
> +		machine_check_print_event_info(&vcpu->arch.mce_evt, false);
> +
> +		/*
> +		 * If the guest can do FWNMI, exit to userspace so it can
> +		 * deliver a FWNMI to the guest.
> +		 * Otherwise we synthesize a machine check for the guest
> +		 * so that it knows that the machine check occurred.
> +		 */
> +		if (!vcpu->kvm->arch.fwnmi_enabled) {
> +			ulong flags = vcpu->arch.shregs.msr & 0x083c0000;
> +			kvmppc_core_queue_machine_check(vcpu, flags);
> +			r = RESUME_GUEST;
> +			break;
> +		}
> +
>  		/* Exit to guest with KVM_EXIT_NMI as exit reason */
>  		run->exit_reason = KVM_EXIT_NMI;
>  		run->hw.hardware_exit_reason = vcpu->arch.trap;
> @@ -1227,8 +1243,6 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
>  			run->flags |= KVM_RUN_PPC_NMI_DISP_NOT_RECOV;
> 
>  		r = RESUME_HOST;
> -		/* Print the MCE event to host console. */
> -		machine_check_print_event_info(&vcpu->arch.mce_evt, false);
>  		break;
>  	case BOOK3S_INTERRUPT_PROGRAM:
>  	{
> diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
> index 0787f12..9aa10b1 100644
> --- a/arch/powerpc/kvm/book3s_hv_ras.c
> +++ b/arch/powerpc/kvm/book3s_hv_ras.c
> @@ -69,7 +69,7 @@ static void reload_slb(struct kvm_vcpu *vcpu)
>   *
>   * Returns: 0 => exit guest, 1 => deliver machine check to guest

You have to remove the above comment as the function now returns nothing.

Reviewed-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>

Regards,
Aravinda

>   */
> -static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
> +static void kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
>  {
>  	unsigned long srr1 = vcpu->arch.shregs.msr;
>  	struct machine_check_event mce_evt;
> @@ -111,52 +111,24 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
>  	}
> 
>  	/*
> -	 * See if we have already handled the condition in the linux host.
> -	 * We assume that if the condition is recovered then linux host
> -	 * will have generated an error log event that we will pick
> -	 * up and log later.
> -	 * Don't release mce event now. We will queue up the event so that
> -	 * we can log the MCE event info on host console.
> +	 * Now get the event and stash it in the vcpu struct so it can
> +	 * be handled by the primary thread in virtual mode.  We can't
> +	 * call machine_check_queue_event() here if we are running on
> +	 * an offline secondary thread.
>  	 */
> -	if (!get_mce_event(&mce_evt, MCE_EVENT_DONTRELEASE))
> -		goto out;
> -
> -	if (mce_evt.version = MCE_V1 &&
> -	    (mce_evt.severity = MCE_SEV_NO_ERROR ||
> -	     mce_evt.disposition = MCE_DISPOSITION_RECOVERED))
> -		handled = 1;
> -
> -out:
> -	/*
> -	 * For guest that supports FWNMI capability, hook the MCE event into
> -	 * vcpu structure. We are going to exit the guest with KVM_EXIT_NMI
> -	 * exit reason. On our way to exit we will pull this event from vcpu
> -	 * structure and print it from thread 0 of the core/subcore.
> -	 *
> -	 * For guest that does not support FWNMI capability (old QEMU):
> -	 * We are now going enter guest either through machine check
> -	 * interrupt (for unhandled errors) or will continue from
> -	 * current HSRR0 (for handled errors) in guest. Hence
> -	 * queue up the event so that we can log it from host console later.
> -	 */
> -	if (vcpu->kvm->arch.fwnmi_enabled) {
> -		/*
> -		 * Hook up the mce event on to vcpu structure.
> -		 * First clear the old event.
> -		 */
> -		memset(&vcpu->arch.mce_evt, 0, sizeof(vcpu->arch.mce_evt));
> -		if (get_mce_event(&mce_evt, MCE_EVENT_RELEASE)) {
> -			vcpu->arch.mce_evt = mce_evt;
> -		}
> -	} else
> -		machine_check_queue_event();
> +	if (get_mce_event(&mce_evt, MCE_EVENT_RELEASE)) {
> +		if (handled && mce_evt.version = MCE_V1)
> +			mce_evt.disposition = MCE_DISPOSITION_RECOVERED;
> +	} else {
> +		memset(&mce_evt, 0, sizeof(mce_evt));
> +	}
> 
> -	return handled;
> +	vcpu->arch.mce_evt = mce_evt;
>  }
> 
> -long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu)
> +void kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu)
>  {
> -	return kvmppc_realmode_mc_power7(vcpu);
> +	kvmppc_realmode_mc_power7(vcpu);
>  }
> 
>  /* Check if dynamic split is in force and return subcore size accordingly. */
> diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> index 9b8d50a..f24f6a2 100644
> --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> @@ -2826,49 +2826,15 @@ kvm_cede_exit:
>  #endif /* CONFIG_KVM_XICS */
>  3:	b	guest_exit_cont
> 
> -	/* Try to handle a machine check in real mode */
> +	/* Try to do machine check recovery in real mode */
>  machine_check_realmode:
>  	mr	r3, r9		/* get vcpu pointer */
>  	bl	kvmppc_realmode_machine_check
>  	nop
> +	/* all machine checks go to virtual mode for further handling */
>  	ld	r9, HSTATE_KVM_VCPU(r13)
>  	li	r12, BOOK3S_INTERRUPT_MACHINE_CHECK
> -	/*
> -	 * For the guest that is FWNMI capable, deliver all the MCE errors
> -	 * (handled/unhandled) by exiting the guest with KVM_EXIT_NMI exit
> -	 * reason. This new approach injects machine check errors in guest
> -	 * address space to guest with additional information in the form
> -	 * of RTAS event, thus enabling guest kernel to suitably handle
> -	 * such errors.
> -	 *
> -	 * For the guest that is not FWNMI capable (old QEMU) fallback
> -	 * to old behaviour for backward compatibility:
> -	 * Deliver unhandled/fatal (e.g. UE) MCE errors to guest either
> -	 * through machine check interrupt (set HSRR0 to 0x200).
> -	 * For handled errors (no-fatal), just go back to guest execution
> -	 * with current HSRR0.
> -	 * if we receive machine check with MSR(RI=0) then deliver it to
> -	 * guest as machine check causing guest to crash.
> -	 */
> -	ld	r11, VCPU_MSR(r9)
> -	rldicl.	r0, r11, 64-MSR_HV_LG, 63 /* check if it happened in HV mode */
> -	bne	guest_exit_cont		/* if so, exit to host */
> -	/* Check if guest is capable of handling NMI exit */
> -	ld	r10, VCPU_KVM(r9)
> -	lbz	r10, KVM_FWNMI(r10)
> -	cmpdi	r10, 1			/* FWNMI capable? */
> -	beq	guest_exit_cont		/* if so, exit with KVM_EXIT_NMI. */
> -
> -	/* if not, fall through for backward compatibility. */
> -	andi.	r10, r11, MSR_RI	/* check for unrecoverable exception */
> -	beq	1f			/* Deliver a machine check to guest */
> -	ld	r10, VCPU_PC(r9)
> -	cmpdi	r3, 0		/* Did we handle MCE ? */
> -	bne	2f	/* Continue guest execution. */
> -	/* If not, deliver a machine check.  SRR0/1 are already set */
> -1:	li	r10, BOOK3S_INTERRUPT_MACHINE_CHECK
> -	bl	kvmppc_msr_interrupt
> -2:	b	fast_interrupt_c_return
> +	b	guest_exit_cont
> 
>  /*
>   * Call C code to handle a HMI in real mode.
> 

-- 
Regards,
Aravinda

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/2] powerpc/64s: Better printing of machine check info for guest MCEs
@ 2019-02-20  9:33     ` Aravinda Prasad
  0 siblings, 0 replies; 20+ messages in thread
From: Aravinda Prasad @ 2019-02-20  9:33 UTC (permalink / raw)
  To: Paul Mackerras, linuxppc-dev, kvm, kvm-ppc



On Wednesday 20 February 2019 06:36 AM, Paul Mackerras wrote:
> This adds an "in_guest" parameter to machine_check_print_event_info()
> so that we can avoid trying to translate guest NIP values into
> symbolic form using the host kernel's symbol table.

Reviewed-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>

Regards,
Aravinda

> 
> Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
> ---
>  arch/powerpc/include/asm/mce.h        | 2 +-
>  arch/powerpc/kernel/mce.c             | 8 +++++---
>  arch/powerpc/kvm/book3s_hv.c          | 4 ++--
>  arch/powerpc/platforms/powernv/opal.c | 2 +-
>  4 files changed, 9 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
> index a8b8903..17996bc 100644
> --- a/arch/powerpc/include/asm/mce.h
> +++ b/arch/powerpc/include/asm/mce.h
> @@ -209,7 +209,7 @@ extern int get_mce_event(struct machine_check_event *mce, bool release);
>  extern void release_mce_event(void);
>  extern void machine_check_queue_event(void);
>  extern void machine_check_print_event_info(struct machine_check_event *evt,
> -					   bool user_mode);
> +					   bool user_mode, bool in_guest);
>  #ifdef CONFIG_PPC_BOOK3S_64
>  void flush_and_reload_slb(void);
>  #endif /* CONFIG_PPC_BOOK3S_64 */
> diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
> index bd933a7..d01b690 100644
> --- a/arch/powerpc/kernel/mce.c
> +++ b/arch/powerpc/kernel/mce.c
> @@ -301,13 +301,13 @@ static void machine_check_process_queued_event(struct irq_work *work)
>  	while (__this_cpu_read(mce_queue_count) > 0) {
>  		index = __this_cpu_read(mce_queue_count) - 1;
>  		evt = this_cpu_ptr(&mce_event_queue[index]);
> -		machine_check_print_event_info(evt, false);
> +		machine_check_print_event_info(evt, false, false);
>  		__this_cpu_dec(mce_queue_count);
>  	}
>  }
> 
>  void machine_check_print_event_info(struct machine_check_event *evt,
> -				    bool user_mode)
> +				    bool user_mode, bool in_guest)
>  {
>  	const char *level, *sevstr, *subtype;
>  	static const char *mc_ue_types[] = {
> @@ -387,7 +387,9 @@ void machine_check_print_event_info(struct machine_check_event *evt,
>  	       evt->disposition = MCE_DISPOSITION_RECOVERED ?
>  	       "Recovered" : "Not recovered");
> 
> -	if (user_mode) {
> +	if (in_guest) {
> +		printk("%s  Guest NIP: %016llx\n", evt->srr0);
> +	} else if (user_mode) {
>  		printk("%s  NIP: [%016llx] PID: %d Comm: %s\n", level,
>  			evt->srr0, current->pid, current->comm);
>  	} else {
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index d8bf05a..81cba4b 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -1216,7 +1216,7 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
>  		break;
>  	case BOOK3S_INTERRUPT_MACHINE_CHECK:
>  		/* Print the MCE event to host console. */
> -		machine_check_print_event_info(&vcpu->arch.mce_evt, false);
> +		machine_check_print_event_info(&vcpu->arch.mce_evt, false, true);
> 
>  		/*
>  		 * If the guest can do FWNMI, exit to userspace so it can
> @@ -1406,7 +1406,7 @@ static int kvmppc_handle_nested_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
>  		/* Pass the machine check to the L1 guest */
>  		r = RESUME_HOST;
>  		/* Print the MCE event to host console. */
> -		machine_check_print_event_info(&vcpu->arch.mce_evt, false);
> +		machine_check_print_event_info(&vcpu->arch.mce_evt, false, true);
>  		break;
>  	/*
>  	 * We get these next two if the guest accesses a page which it thinks
> diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
> index 79586f1..05c85be 100644
> --- a/arch/powerpc/platforms/powernv/opal.c
> +++ b/arch/powerpc/platforms/powernv/opal.c
> @@ -587,7 +587,7 @@ int opal_machine_check(struct pt_regs *regs)
>  		       evt.version);
>  		return 0;
>  	}
> -	machine_check_print_event_info(&evt, user_mode(regs));
> +	machine_check_print_event_info(&evt, user_mode(regs), false);
> 
>  	if (opal_recover_mce(regs, &evt))
>  		return 1;
> 

-- 
Regards,
Aravinda

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/2] powerpc/64s: Better printing of machine check info for guest MCEs
  2019-02-20  1:06   ` Paul Mackerras
@ 2019-02-20  9:47     ` Mahesh J Salgaonkar
  -1 siblings, 0 replies; 20+ messages in thread
From: Mahesh J Salgaonkar @ 2019-02-20  9:35 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: linuxppc-dev, kvm-ppc, kvm

On 2019-02-20 12:06:23 Wed, Paul Mackerras wrote:
> This adds an "in_guest" parameter to machine_check_print_event_info()
> so that we can avoid trying to translate guest NIP values into
> symbolic form using the host kernel's symbol table.
> 
> Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
> ---
>  arch/powerpc/include/asm/mce.h        | 2 +-
>  arch/powerpc/kernel/mce.c             | 8 +++++---
>  arch/powerpc/kvm/book3s_hv.c          | 4 ++--
>  arch/powerpc/platforms/powernv/opal.c | 2 +-
>  4 files changed, 9 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
> index a8b8903..17996bc 100644
> --- a/arch/powerpc/include/asm/mce.h
> +++ b/arch/powerpc/include/asm/mce.h
> @@ -209,7 +209,7 @@ extern int get_mce_event(struct machine_check_event *mce, bool release);
>  extern void release_mce_event(void);
>  extern void machine_check_queue_event(void);
>  extern void machine_check_print_event_info(struct machine_check_event *evt,
> -					   bool user_mode);
> +					   bool user_mode, bool in_guest);
>  #ifdef CONFIG_PPC_BOOK3S_64
>  void flush_and_reload_slb(void);
>  #endif /* CONFIG_PPC_BOOK3S_64 */
> diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
> index bd933a7..d01b690 100644
> --- a/arch/powerpc/kernel/mce.c
> +++ b/arch/powerpc/kernel/mce.c
> @@ -301,13 +301,13 @@ static void machine_check_process_queued_event(struct irq_work *work)
>  	while (__this_cpu_read(mce_queue_count) > 0) {
>  		index = __this_cpu_read(mce_queue_count) - 1;
>  		evt = this_cpu_ptr(&mce_event_queue[index]);
> -		machine_check_print_event_info(evt, false);
> +		machine_check_print_event_info(evt, false, false);
>  		__this_cpu_dec(mce_queue_count);
>  	}
>  }
> 
>  void machine_check_print_event_info(struct machine_check_event *evt,
> -				    bool user_mode)
> +				    bool user_mode, bool in_guest)
>  {
>  	const char *level, *sevstr, *subtype;
>  	static const char *mc_ue_types[] = {
> @@ -387,7 +387,9 @@ void machine_check_print_event_info(struct machine_check_event *evt,
>  	       evt->disposition == MCE_DISPOSITION_RECOVERED ?
>  	       "Recovered" : "Not recovered");
> 
> -	if (user_mode) {
> +	if (in_guest) {
> +		printk("%s  Guest NIP: %016llx\n", evt->srr0);

Missing 'level' argument. This should be:
		printk("%s  Guest NIP: %016llx\n", level, evt->srr0);

Rest looks fine to me.

Reviewed-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>

Thanks,
-Mahesh.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/2] KVM: PPC: Book3S HV: Simplify machine check handling
  2019-02-20  1:05 ` Paul Mackerras
@ 2019-02-20  9:55   ` Mahesh J Salgaonkar
  -1 siblings, 0 replies; 20+ messages in thread
From: Mahesh J Salgaonkar @ 2019-02-20  9:43 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: linuxppc-dev, kvm-ppc, kvm

On 2019-02-20 12:05:50 Wed, Paul Mackerras wrote:
> This makes the handling of machine check interrupts that occur inside
> a guest simpler and more robust, with less done in assembler code and
> in real mode.
> 
> Now, when a machine check occurs inside a guest, we always get the
> machine check event struct and put a copy in the vcpu struct for the
> vcpu where the machine check occurred.  We no longer call
> machine_check_queue_event() from kvmppc_realmode_mc_power7(), because
> on POWER8, when a vcpu is running on an offline secondary thread and
> we call machine_check_queue_event(), that calls irq_work_queue(),
> which doesn't work because the CPU is offline, but instead triggers
> the WARN_ON(lazy_irq_pending()) in pnv_smp_cpu_kill_self() (which
> fires again and again because nothing clears the condition).
> 
> All that machine_check_queue_event() actually does is to cause the
> event to be printed to the console.  For a machine check occurring in
> the guest, we now print the event in kvmppc_handle_exit_hv()
> instead.
> 
> The assembly code at label machine_check_realmode now just calls C
> code and then continues exiting the guest.  We no longer either
> synthesize a machine check for the guest in assembly code or return
> to the guest without a machine check.
> 
> The code in kvmppc_handle_exit_hv() is extended to handle the case
> where the guest is not FWNMI-capable.  In that case we now always
> synthesize a machine check interrupt for the guest.  Previously, if
> the host thinks it has recovered the machine check fully, it would
> return to the guest without any notification that the machine check
> had occurred.  If the machine check was caused by some action of the
> guest (such as creating duplicate SLB entries), it is much better to
> tell the guest that it has caused a problem.  Therefore we now always
> generate a machine check interrupt for guests that are not
> FWNMI-capable.

Looks good to me.

Reviewed-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>

Thanks,
-Mahesh.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/2] powerpc/64s: Better printing of machine check info for guest MCEs
@ 2019-02-20  9:47     ` Mahesh J Salgaonkar
  0 siblings, 0 replies; 20+ messages in thread
From: Mahesh J Salgaonkar @ 2019-02-20  9:47 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: linuxppc-dev, kvm-ppc, kvm

On 2019-02-20 12:06:23 Wed, Paul Mackerras wrote:
> This adds an "in_guest" parameter to machine_check_print_event_info()
> so that we can avoid trying to translate guest NIP values into
> symbolic form using the host kernel's symbol table.
> 
> Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
> ---
>  arch/powerpc/include/asm/mce.h        | 2 +-
>  arch/powerpc/kernel/mce.c             | 8 +++++---
>  arch/powerpc/kvm/book3s_hv.c          | 4 ++--
>  arch/powerpc/platforms/powernv/opal.c | 2 +-
>  4 files changed, 9 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
> index a8b8903..17996bc 100644
> --- a/arch/powerpc/include/asm/mce.h
> +++ b/arch/powerpc/include/asm/mce.h
> @@ -209,7 +209,7 @@ extern int get_mce_event(struct machine_check_event *mce, bool release);
>  extern void release_mce_event(void);
>  extern void machine_check_queue_event(void);
>  extern void machine_check_print_event_info(struct machine_check_event *evt,
> -					   bool user_mode);
> +					   bool user_mode, bool in_guest);
>  #ifdef CONFIG_PPC_BOOK3S_64
>  void flush_and_reload_slb(void);
>  #endif /* CONFIG_PPC_BOOK3S_64 */
> diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
> index bd933a7..d01b690 100644
> --- a/arch/powerpc/kernel/mce.c
> +++ b/arch/powerpc/kernel/mce.c
> @@ -301,13 +301,13 @@ static void machine_check_process_queued_event(struct irq_work *work)
>  	while (__this_cpu_read(mce_queue_count) > 0) {
>  		index = __this_cpu_read(mce_queue_count) - 1;
>  		evt = this_cpu_ptr(&mce_event_queue[index]);
> -		machine_check_print_event_info(evt, false);
> +		machine_check_print_event_info(evt, false, false);
>  		__this_cpu_dec(mce_queue_count);
>  	}
>  }
> 
>  void machine_check_print_event_info(struct machine_check_event *evt,
> -				    bool user_mode)
> +				    bool user_mode, bool in_guest)
>  {
>  	const char *level, *sevstr, *subtype;
>  	static const char *mc_ue_types[] = {
> @@ -387,7 +387,9 @@ void machine_check_print_event_info(struct machine_check_event *evt,
>  	       evt->disposition = MCE_DISPOSITION_RECOVERED ?
>  	       "Recovered" : "Not recovered");
> 
> -	if (user_mode) {
> +	if (in_guest) {
> +		printk("%s  Guest NIP: %016llx\n", evt->srr0);

Missing 'level' argument. This should be:
		printk("%s  Guest NIP: %016llx\n", level, evt->srr0);

Rest looks fine to me.

Reviewed-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>

Thanks,
-Mahesh.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/2] KVM: PPC: Book3S HV: Simplify machine check handling
@ 2019-02-20  9:55   ` Mahesh J Salgaonkar
  0 siblings, 0 replies; 20+ messages in thread
From: Mahesh J Salgaonkar @ 2019-02-20  9:55 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: linuxppc-dev, kvm-ppc, kvm

On 2019-02-20 12:05:50 Wed, Paul Mackerras wrote:
> This makes the handling of machine check interrupts that occur inside
> a guest simpler and more robust, with less done in assembler code and
> in real mode.
> 
> Now, when a machine check occurs inside a guest, we always get the
> machine check event struct and put a copy in the vcpu struct for the
> vcpu where the machine check occurred.  We no longer call
> machine_check_queue_event() from kvmppc_realmode_mc_power7(), because
> on POWER8, when a vcpu is running on an offline secondary thread and
> we call machine_check_queue_event(), that calls irq_work_queue(),
> which doesn't work because the CPU is offline, but instead triggers
> the WARN_ON(lazy_irq_pending()) in pnv_smp_cpu_kill_self() (which
> fires again and again because nothing clears the condition).
> 
> All that machine_check_queue_event() actually does is to cause the
> event to be printed to the console.  For a machine check occurring in
> the guest, we now print the event in kvmppc_handle_exit_hv()
> instead.
> 
> The assembly code at label machine_check_realmode now just calls C
> code and then continues exiting the guest.  We no longer either
> synthesize a machine check for the guest in assembly code or return
> to the guest without a machine check.
> 
> The code in kvmppc_handle_exit_hv() is extended to handle the case
> where the guest is not FWNMI-capable.  In that case we now always
> synthesize a machine check interrupt for the guest.  Previously, if
> the host thinks it has recovered the machine check fully, it would
> return to the guest without any notification that the machine check
> had occurred.  If the machine check was caused by some action of the
> guest (such as creating duplicate SLB entries), it is much better to
> tell the guest that it has caused a problem.  Therefore we now always
> generate a machine check interrupt for guests that are not
> FWNMI-capable.

Looks good to me.

Reviewed-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>

Thanks,
-Mahesh.

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2019-02-20  9:55 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-06-27  6:07 [PATCH 1/2] KVM: PPC: Book3S HV: Simplify dynamic micro-threading code Paul Mackerras
2017-06-27  6:07 ` Paul Mackerras
2017-06-27  6:09 ` [PATCH 2/2] KVM: PPC: Book3S HV: Close race with testing for signals on guest entry Paul Mackerras
2017-06-27  6:09   ` Paul Mackerras
2017-06-30  7:03   ` [PATCH v2 " Paul Mackerras
2017-06-30  7:03     ` Paul Mackerras
2017-06-30  4:54 ` [PATCH 1/2] KVM: PPC: Book3S HV: Simplify dynamic micro-threading code David Gibson
2017-06-30  4:54   ` David Gibson
2019-02-20  1:05 [PATCH 1/2] KVM: PPC: Book3S HV: Simplify machine check handling Paul Mackerras
2019-02-20  1:05 ` Paul Mackerras
2019-02-20  1:06 ` [PATCH 2/2] powerpc/64s: Better printing of machine check info for guest MCEs Paul Mackerras
2019-02-20  1:06   ` Paul Mackerras
2019-02-20  9:21   ` Aravinda Prasad
2019-02-20  9:33     ` Aravinda Prasad
2019-02-20  9:35   ` Mahesh J Salgaonkar
2019-02-20  9:47     ` Mahesh J Salgaonkar
2019-02-20  9:20 ` [PATCH 1/2] KVM: PPC: Book3S HV: Simplify machine check handling Aravinda Prasad
2019-02-20  9:32   ` Aravinda Prasad
2019-02-20  9:43 ` Mahesh J Salgaonkar
2019-02-20  9:55   ` Mahesh J Salgaonkar

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.