All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 01/11] powerpc/kvm/xive: Add more debugfs queues info
@ 2017-11-23  4:36 ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 36+ messages in thread
From: Benjamin Herrenschmidt @ 2017-11-23  4:36 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: kvm, kvm-ppc, Benjamin Herrenschmidt

Add details about enabled queues and escalation interrupts

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kvm/book3s_xive.c | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index bf457843e032..6cff5bdfd6b7 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -1794,6 +1794,7 @@ static int xive_debug_show(struct seq_file *m, void *private)
 
 	kvm_for_each_vcpu(i, vcpu, kvm) {
 		struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+		unsigned int i;
 
 		if (!xc)
 			continue;
@@ -1803,6 +1804,33 @@ static int xive_debug_show(struct seq_file *m, void *private)
 			   xc->server_num, xc->cppr, xc->hw_cppr,
 			   xc->mfrr, xc->pending,
 			   xc->stat_rm_h_xirr, xc->stat_vm_h_xirr);
+		for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
+			struct xive_q *q = &xc->queues[i];
+			u32 i0, i1, idx;
+
+			if (!q->qpage && !xc->esc_virq[i])
+				continue;
+
+			seq_printf(m, " [q%d]: ", i);
+
+			if (q->qpage) {
+				idx = q->idx;
+				i0 = be32_to_cpup(q->qpage + idx);
+				idx = (idx + 1) & q->msk;
+				i1 = be32_to_cpup(q->qpage + idx);
+				seq_printf(m, "T=%d %08x %08x... \n", q->toggle, i0, i1);
+			}
+			if (xc->esc_virq[i]) {
+				struct irq_data *d = irq_get_irq_data(xc->esc_virq[i]);
+				struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
+				u64 pq = xive_vm_esb_load(xd, XIVE_ESB_GET);
+				seq_printf(m, "E:%c%c I(%d:%llx:%llx)",
+					   (pq & XIVE_ESB_VAL_P) ? 'P' : 'p',
+					   (pq & XIVE_ESB_VAL_Q) ? 'Q' : 'q',
+					   xc->esc_virq[i], pq, xd->eoi_page);
+				seq_printf(m, "\n");
+			}
+		}
 
 		t_rm_h_xirr += xc->stat_rm_h_xirr;
 		t_rm_h_ipoll += xc->stat_rm_h_ipoll;
-- 
2.14.3

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH v2 01/11] powerpc/kvm/xive: Add more debugfs queues info
@ 2017-11-23  4:36 ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 36+ messages in thread
From: Benjamin Herrenschmidt @ 2017-11-23  4:36 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: kvm, kvm-ppc, Benjamin Herrenschmidt

Add details about enabled queues and escalation interrupts

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kvm/book3s_xive.c | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index bf457843e032..6cff5bdfd6b7 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -1794,6 +1794,7 @@ static int xive_debug_show(struct seq_file *m, void *private)
 
 	kvm_for_each_vcpu(i, vcpu, kvm) {
 		struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+		unsigned int i;
 
 		if (!xc)
 			continue;
@@ -1803,6 +1804,33 @@ static int xive_debug_show(struct seq_file *m, void *private)
 			   xc->server_num, xc->cppr, xc->hw_cppr,
 			   xc->mfrr, xc->pending,
 			   xc->stat_rm_h_xirr, xc->stat_vm_h_xirr);
+		for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
+			struct xive_q *q = &xc->queues[i];
+			u32 i0, i1, idx;
+
+			if (!q->qpage && !xc->esc_virq[i])
+				continue;
+
+			seq_printf(m, " [q%d]: ", i);
+
+			if (q->qpage) {
+				idx = q->idx;
+				i0 = be32_to_cpup(q->qpage + idx);
+				idx = (idx + 1) & q->msk;
+				i1 = be32_to_cpup(q->qpage + idx);
+				seq_printf(m, "T=%d %08x %08x... \n", q->toggle, i0, i1);
+			}
+			if (xc->esc_virq[i]) {
+				struct irq_data *d = irq_get_irq_data(xc->esc_virq[i]);
+				struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
+				u64 pq = xive_vm_esb_load(xd, XIVE_ESB_GET);
+				seq_printf(m, "E:%c%c I(%d:%llx:%llx)",
+					   (pq & XIVE_ESB_VAL_P) ? 'P' : 'p',
+					   (pq & XIVE_ESB_VAL_Q) ? 'Q' : 'q',
+					   xc->esc_virq[i], pq, xd->eoi_page);
+				seq_printf(m, "\n");
+			}
+		}
 
 		t_rm_h_xirr += xc->stat_rm_h_xirr;
 		t_rm_h_ipoll += xc->stat_rm_h_ipoll;
-- 
2.14.3


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH v2 02/11] powerpc/kvm/xive: Enable use of the new "single escalation" feature
  2017-11-23  4:36 ` Benjamin Herrenschmidt
@ 2017-11-23  4:36   ` Benjamin Herrenschmidt
  -1 siblings, 0 replies; 36+ messages in thread
From: Benjamin Herrenschmidt @ 2017-11-23  4:36 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: kvm, kvm-ppc, Benjamin Herrenschmidt

That feature, provided by Power9 DDD2.0 and later, when supported
by newer OPAL versions, allows to sacrifice a queue (priority 7)
in favor of merging all the escalation interrupts of the queues
of a single VP into a single interrupt.

This reduces the number of host interrupts used up by KVM guests
especially when those guests use multiple priorities.

It will also enable a future change to control the masking of the
escalation interrupts more precisely to avoid spurrious ones.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/opal-api.h |  1 +
 arch/powerpc/include/asm/xive.h     |  3 ++-
 arch/powerpc/kvm/book3s_xive.c      | 48 ++++++++++++++++++++++++-------------
 arch/powerpc/kvm/book3s_xive.h      | 15 +++++-------
 arch/powerpc/sysdev/xive/native.c   | 18 ++++++++++++--
 5 files changed, 57 insertions(+), 28 deletions(-)

diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index 450a60b81d2a..4df668a32ab4 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -1070,6 +1070,7 @@ enum {
 /* Flags for OPAL_XIVE_GET/SET_VP_INFO */
 enum {
 	OPAL_XIVE_VP_ENABLED		= 0x00000001,
+	OPAL_XIVE_VP_SINGLE_ESCALATION	= 0x00000002,
 };
 
 /* "Any chip" replacement for chip ID for allocation functions */
diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
index 371fbebf1ec9..11d5edeb5c22 100644
--- a/arch/powerpc/include/asm/xive.h
+++ b/arch/powerpc/include/asm/xive.h
@@ -143,9 +143,10 @@ extern void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio);
 
 extern void xive_native_sync_source(u32 hw_irq);
 extern bool is_xive_irq(struct irq_chip *chip);
-extern int xive_native_enable_vp(u32 vp_id);
+extern int xive_native_enable_vp(u32 vp_id, bool single_escalation);
 extern int xive_native_disable_vp(u32 vp_id);
 extern int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id);
+extern bool xive_native_has_single_escalation(void);
 
 #else
 
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index 6cff5bdfd6b7..a102efeabf05 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -112,19 +112,21 @@ static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio)
 		return -EIO;
 	}
 
-	/*
-	 * Future improvement: start with them disabled
-	 * and handle DD2 and later scheme of merged escalation
-	 * interrupts
-	 */
-	name = kasprintf(GFP_KERNEL, "kvm-%d-%d-%d",
-			 vcpu->kvm->arch.lpid, xc->server_num, prio);
+	if (xc->xive->single_escalation)
+		name = kasprintf(GFP_KERNEL, "kvm-%d-%d",
+				 vcpu->kvm->arch.lpid, xc->server_num);
+	else
+		name = kasprintf(GFP_KERNEL, "kvm-%d-%d-%d",
+				 vcpu->kvm->arch.lpid, xc->server_num, prio);
 	if (!name) {
 		pr_err("Failed to allocate escalation irq name for queue %d of VCPU %d\n",
 		       prio, xc->server_num);
 		rc = -ENOMEM;
 		goto error;
 	}
+
+	pr_devel("Escalation %s irq %d (prio %d)\n", name, xc->esc_virq[prio], prio);
+
 	rc = request_irq(xc->esc_virq[prio], xive_esc_irq,
 			 IRQF_NO_THREAD, name, vcpu);
 	if (rc) {
@@ -191,12 +193,12 @@ static int xive_check_provisioning(struct kvm *kvm, u8 prio)
 
 	pr_devel("Provisioning prio... %d\n", prio);
 
-	/* Provision each VCPU and enable escalations */
+	/* Provision each VCPU and enable escalations if needed */
 	kvm_for_each_vcpu(i, vcpu, kvm) {
 		if (!vcpu->arch.xive_vcpu)
 			continue;
 		rc = xive_provision_queue(vcpu, prio);
-		if (rc == 0)
+		if (rc == 0 && !xive->single_escalation)
 			xive_attach_escalation(vcpu, prio);
 		if (rc)
 			return rc;
@@ -1081,6 +1083,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
 	/* Allocate IPI */
 	xc->vp_ipi = xive_native_alloc_irq();
 	if (!xc->vp_ipi) {
+		pr_err("Failed to allocate xive irq for VCPU IPI\n");
 		r = -EIO;
 		goto bail;
 	}
@@ -1090,19 +1093,34 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
 	if (r)
 		goto bail;
 
+	/*
+	 * Enable the VP first as the single escalation mode will
+	 * affect escalation interrupts numbering
+	 */
+	r = xive_native_enable_vp(xc->vp_id, xive->single_escalation);
+	if (r) {
+		pr_err("Failed to enable VP in OPAL, err %d\n", r);
+		goto bail;
+	}
+
 	/*
 	 * Initialize queues. Initially we set them all for no queueing
 	 * and we enable escalation for queue 0 only which we'll use for
 	 * our mfrr change notifications. If the VCPU is hot-plugged, we
-	 * do handle provisioning however.
+	 * do handle provisioning however based on the existing "map"
+	 * of enabled queues.
 	 */
 	for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
 		struct xive_q *q = &xc->queues[i];
 
+		/* Single escalation, no queue 7 */
+		if (i == 7 && xive->single_escalation)
+			break;
+
 		/* Is queue already enabled ? Provision it */
 		if (xive->qmap & (1 << i)) {
 			r = xive_provision_queue(vcpu, i);
-			if (r == 0)
+			if (r == 0 && !xive->single_escalation)
 				xive_attach_escalation(vcpu, i);
 			if (r)
 				goto bail;
@@ -1122,11 +1140,6 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
 	if (r)
 		goto bail;
 
-	/* Enable the VP */
-	r = xive_native_enable_vp(xc->vp_id);
-	if (r)
-		goto bail;
-
 	/* Route the IPI */
 	r = xive_native_configure_irq(xc->vp_ipi, xc->vp_id, 0, XICS_IPI);
 	if (!r)
@@ -1473,6 +1486,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
 
 	pr_devel("  val=0x016%llx (server=0x%x, guest_prio=%d)\n",
 		 val, server, guest_prio);
+
 	/*
 	 * If the source doesn't already have an IPI, allocate
 	 * one and get the corresponding data
@@ -1761,6 +1775,8 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
 	if (xive->vp_base == XIVE_INVALID_VP)
 		ret = -ENOMEM;
 
+	xive->single_escalation = xive_native_has_single_escalation();
+
 	if (ret) {
 		kfree(xive);
 		return ret;
diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h
index 6ba63f8e8a61..a08ae6fd4c51 100644
--- a/arch/powerpc/kvm/book3s_xive.h
+++ b/arch/powerpc/kvm/book3s_xive.h
@@ -120,6 +120,8 @@ struct kvmppc_xive {
 	u32	q_order;
 	u32	q_page_order;
 
+	/* Flags */
+	u8	single_escalation;
 };
 
 #define KVMPPC_XIVE_Q_COUNT	8
@@ -201,25 +203,20 @@ static inline struct kvmppc_xive_src_block *kvmppc_xive_find_source(struct kvmpp
  * is as follow.
  *
  * Guest request for 0...6 are honored. Guest request for anything
- * higher results in a priority of 7 being applied.
- *
- * However, when XIRR is returned via H_XIRR, 7 is translated to 0xb
- * in order to match AIX expectations
+ * higher results in a priority of 6 being applied.
  *
  * Similar mapping is done for CPPR values
  */
 static inline u8 xive_prio_from_guest(u8 prio)
 {
-	if (prio == 0xff || prio < 8)
+	if (prio == 0xff || prio < 6)
 		return prio;
-	return 7;
+	return 6;
 }
 
 static inline u8 xive_prio_to_guest(u8 prio)
 {
-	if (prio == 0xff || prio < 7)
-		return prio;
-	return 0xb;
+	return prio;
 }
 
 static inline u32 __xive_read_eq(__be32 *qpage, u32 msk, u32 *idx, u32 *toggle)
diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c
index ebc244b08d67..d22aeb0b69e1 100644
--- a/arch/powerpc/sysdev/xive/native.c
+++ b/arch/powerpc/sysdev/xive/native.c
@@ -42,6 +42,7 @@ static u32 xive_provision_chip_count;
 static u32 xive_queue_shift;
 static u32 xive_pool_vps = XIVE_INVALID_VP;
 static struct kmem_cache *xive_provision_cache;
+static bool xive_has_single_esc;
 
 int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data)
 {
@@ -571,6 +572,10 @@ bool __init xive_native_init(void)
 			break;
 	}
 
+	/* Do we support single escalation */
+	if (of_get_property(np, "single-escalation-support", NULL) != NULL)
+		xive_has_single_esc = true;
+
 	/* Configure Thread Management areas for KVM */
 	for_each_possible_cpu(cpu)
 		kvmppc_set_xive_tima(cpu, r.start, tima);
@@ -667,12 +672,15 @@ void xive_native_free_vp_block(u32 vp_base)
 }
 EXPORT_SYMBOL_GPL(xive_native_free_vp_block);
 
-int xive_native_enable_vp(u32 vp_id)
+int xive_native_enable_vp(u32 vp_id, bool single_escalation)
 {
 	s64 rc;
+	u64 flags = OPAL_XIVE_VP_ENABLED;
 
+	if (single_escalation)
+		flags |= OPAL_XIVE_VP_SINGLE_ESCALATION;
 	for (;;) {
-		rc = opal_xive_set_vp_info(vp_id, OPAL_XIVE_VP_ENABLED, 0);
+		rc = opal_xive_set_vp_info(vp_id, flags, 0);
 		if (rc != OPAL_BUSY)
 			break;
 		msleep(1);
@@ -710,3 +718,9 @@ int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id)
 	return 0;
 }
 EXPORT_SYMBOL_GPL(xive_native_get_vp_info);
+
+bool xive_native_has_single_escalation(void)
+{
+	return xive_has_single_esc;
+}
+EXPORT_SYMBOL_GPL(xive_native_has_single_escalation);
-- 
2.14.3

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH v2 02/11] powerpc/kvm/xive: Enable use of the new "single escalation" feature
@ 2017-11-23  4:36   ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 36+ messages in thread
From: Benjamin Herrenschmidt @ 2017-11-23  4:36 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: kvm, kvm-ppc, Benjamin Herrenschmidt

That feature, provided by Power9 DDD2.0 and later, when supported
by newer OPAL versions, allows to sacrifice a queue (priority 7)
in favor of merging all the escalation interrupts of the queues
of a single VP into a single interrupt.

This reduces the number of host interrupts used up by KVM guests
especially when those guests use multiple priorities.

It will also enable a future change to control the masking of the
escalation interrupts more precisely to avoid spurrious ones.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/opal-api.h |  1 +
 arch/powerpc/include/asm/xive.h     |  3 ++-
 arch/powerpc/kvm/book3s_xive.c      | 48 ++++++++++++++++++++++++-------------
 arch/powerpc/kvm/book3s_xive.h      | 15 +++++-------
 arch/powerpc/sysdev/xive/native.c   | 18 ++++++++++++--
 5 files changed, 57 insertions(+), 28 deletions(-)

diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index 450a60b81d2a..4df668a32ab4 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -1070,6 +1070,7 @@ enum {
 /* Flags for OPAL_XIVE_GET/SET_VP_INFO */
 enum {
 	OPAL_XIVE_VP_ENABLED		= 0x00000001,
+	OPAL_XIVE_VP_SINGLE_ESCALATION	= 0x00000002,
 };
 
 /* "Any chip" replacement for chip ID for allocation functions */
diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
index 371fbebf1ec9..11d5edeb5c22 100644
--- a/arch/powerpc/include/asm/xive.h
+++ b/arch/powerpc/include/asm/xive.h
@@ -143,9 +143,10 @@ extern void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio);
 
 extern void xive_native_sync_source(u32 hw_irq);
 extern bool is_xive_irq(struct irq_chip *chip);
-extern int xive_native_enable_vp(u32 vp_id);
+extern int xive_native_enable_vp(u32 vp_id, bool single_escalation);
 extern int xive_native_disable_vp(u32 vp_id);
 extern int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id);
+extern bool xive_native_has_single_escalation(void);
 
 #else
 
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index 6cff5bdfd6b7..a102efeabf05 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -112,19 +112,21 @@ static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio)
 		return -EIO;
 	}
 
-	/*
-	 * Future improvement: start with them disabled
-	 * and handle DD2 and later scheme of merged escalation
-	 * interrupts
-	 */
-	name = kasprintf(GFP_KERNEL, "kvm-%d-%d-%d",
-			 vcpu->kvm->arch.lpid, xc->server_num, prio);
+	if (xc->xive->single_escalation)
+		name = kasprintf(GFP_KERNEL, "kvm-%d-%d",
+				 vcpu->kvm->arch.lpid, xc->server_num);
+	else
+		name = kasprintf(GFP_KERNEL, "kvm-%d-%d-%d",
+				 vcpu->kvm->arch.lpid, xc->server_num, prio);
 	if (!name) {
 		pr_err("Failed to allocate escalation irq name for queue %d of VCPU %d\n",
 		       prio, xc->server_num);
 		rc = -ENOMEM;
 		goto error;
 	}
+
+	pr_devel("Escalation %s irq %d (prio %d)\n", name, xc->esc_virq[prio], prio);
+
 	rc = request_irq(xc->esc_virq[prio], xive_esc_irq,
 			 IRQF_NO_THREAD, name, vcpu);
 	if (rc) {
@@ -191,12 +193,12 @@ static int xive_check_provisioning(struct kvm *kvm, u8 prio)
 
 	pr_devel("Provisioning prio... %d\n", prio);
 
-	/* Provision each VCPU and enable escalations */
+	/* Provision each VCPU and enable escalations if needed */
 	kvm_for_each_vcpu(i, vcpu, kvm) {
 		if (!vcpu->arch.xive_vcpu)
 			continue;
 		rc = xive_provision_queue(vcpu, prio);
-		if (rc = 0)
+		if (rc = 0 && !xive->single_escalation)
 			xive_attach_escalation(vcpu, prio);
 		if (rc)
 			return rc;
@@ -1081,6 +1083,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
 	/* Allocate IPI */
 	xc->vp_ipi = xive_native_alloc_irq();
 	if (!xc->vp_ipi) {
+		pr_err("Failed to allocate xive irq for VCPU IPI\n");
 		r = -EIO;
 		goto bail;
 	}
@@ -1090,19 +1093,34 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
 	if (r)
 		goto bail;
 
+	/*
+	 * Enable the VP first as the single escalation mode will
+	 * affect escalation interrupts numbering
+	 */
+	r = xive_native_enable_vp(xc->vp_id, xive->single_escalation);
+	if (r) {
+		pr_err("Failed to enable VP in OPAL, err %d\n", r);
+		goto bail;
+	}
+
 	/*
 	 * Initialize queues. Initially we set them all for no queueing
 	 * and we enable escalation for queue 0 only which we'll use for
 	 * our mfrr change notifications. If the VCPU is hot-plugged, we
-	 * do handle provisioning however.
+	 * do handle provisioning however based on the existing "map"
+	 * of enabled queues.
 	 */
 	for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
 		struct xive_q *q = &xc->queues[i];
 
+		/* Single escalation, no queue 7 */
+		if (i = 7 && xive->single_escalation)
+			break;
+
 		/* Is queue already enabled ? Provision it */
 		if (xive->qmap & (1 << i)) {
 			r = xive_provision_queue(vcpu, i);
-			if (r = 0)
+			if (r = 0 && !xive->single_escalation)
 				xive_attach_escalation(vcpu, i);
 			if (r)
 				goto bail;
@@ -1122,11 +1140,6 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
 	if (r)
 		goto bail;
 
-	/* Enable the VP */
-	r = xive_native_enable_vp(xc->vp_id);
-	if (r)
-		goto bail;
-
 	/* Route the IPI */
 	r = xive_native_configure_irq(xc->vp_ipi, xc->vp_id, 0, XICS_IPI);
 	if (!r)
@@ -1473,6 +1486,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
 
 	pr_devel("  val=0x016%llx (server=0x%x, guest_prio=%d)\n",
 		 val, server, guest_prio);
+
 	/*
 	 * If the source doesn't already have an IPI, allocate
 	 * one and get the corresponding data
@@ -1761,6 +1775,8 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
 	if (xive->vp_base = XIVE_INVALID_VP)
 		ret = -ENOMEM;
 
+	xive->single_escalation = xive_native_has_single_escalation();
+
 	if (ret) {
 		kfree(xive);
 		return ret;
diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h
index 6ba63f8e8a61..a08ae6fd4c51 100644
--- a/arch/powerpc/kvm/book3s_xive.h
+++ b/arch/powerpc/kvm/book3s_xive.h
@@ -120,6 +120,8 @@ struct kvmppc_xive {
 	u32	q_order;
 	u32	q_page_order;
 
+	/* Flags */
+	u8	single_escalation;
 };
 
 #define KVMPPC_XIVE_Q_COUNT	8
@@ -201,25 +203,20 @@ static inline struct kvmppc_xive_src_block *kvmppc_xive_find_source(struct kvmpp
  * is as follow.
  *
  * Guest request for 0...6 are honored. Guest request for anything
- * higher results in a priority of 7 being applied.
- *
- * However, when XIRR is returned via H_XIRR, 7 is translated to 0xb
- * in order to match AIX expectations
+ * higher results in a priority of 6 being applied.
  *
  * Similar mapping is done for CPPR values
  */
 static inline u8 xive_prio_from_guest(u8 prio)
 {
-	if (prio = 0xff || prio < 8)
+	if (prio = 0xff || prio < 6)
 		return prio;
-	return 7;
+	return 6;
 }
 
 static inline u8 xive_prio_to_guest(u8 prio)
 {
-	if (prio = 0xff || prio < 7)
-		return prio;
-	return 0xb;
+	return prio;
 }
 
 static inline u32 __xive_read_eq(__be32 *qpage, u32 msk, u32 *idx, u32 *toggle)
diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c
index ebc244b08d67..d22aeb0b69e1 100644
--- a/arch/powerpc/sysdev/xive/native.c
+++ b/arch/powerpc/sysdev/xive/native.c
@@ -42,6 +42,7 @@ static u32 xive_provision_chip_count;
 static u32 xive_queue_shift;
 static u32 xive_pool_vps = XIVE_INVALID_VP;
 static struct kmem_cache *xive_provision_cache;
+static bool xive_has_single_esc;
 
 int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data)
 {
@@ -571,6 +572,10 @@ bool __init xive_native_init(void)
 			break;
 	}
 
+	/* Do we support single escalation */
+	if (of_get_property(np, "single-escalation-support", NULL) != NULL)
+		xive_has_single_esc = true;
+
 	/* Configure Thread Management areas for KVM */
 	for_each_possible_cpu(cpu)
 		kvmppc_set_xive_tima(cpu, r.start, tima);
@@ -667,12 +672,15 @@ void xive_native_free_vp_block(u32 vp_base)
 }
 EXPORT_SYMBOL_GPL(xive_native_free_vp_block);
 
-int xive_native_enable_vp(u32 vp_id)
+int xive_native_enable_vp(u32 vp_id, bool single_escalation)
 {
 	s64 rc;
+	u64 flags = OPAL_XIVE_VP_ENABLED;
 
+	if (single_escalation)
+		flags |= OPAL_XIVE_VP_SINGLE_ESCALATION;
 	for (;;) {
-		rc = opal_xive_set_vp_info(vp_id, OPAL_XIVE_VP_ENABLED, 0);
+		rc = opal_xive_set_vp_info(vp_id, flags, 0);
 		if (rc != OPAL_BUSY)
 			break;
 		msleep(1);
@@ -710,3 +718,9 @@ int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id)
 	return 0;
 }
 EXPORT_SYMBOL_GPL(xive_native_get_vp_info);
+
+bool xive_native_has_single_escalation(void)
+{
+	return xive_has_single_esc;
+}
+EXPORT_SYMBOL_GPL(xive_native_has_single_escalation);
-- 
2.14.3


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH v2 03/11] powerpc/kvm/xive: Don't use existing "prodded" flag for xive escalations
  2017-11-23  4:36 ` Benjamin Herrenschmidt
@ 2017-11-23  4:36   ` Benjamin Herrenschmidt
  -1 siblings, 0 replies; 36+ messages in thread
From: Benjamin Herrenschmidt @ 2017-11-23  4:36 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: kvm, kvm-ppc, Benjamin Herrenschmidt

The prodded flag is only cleared at the beginning of H_CEDE,
so every time we have an escalation, we will cause the *next*
H_CEDE to return immediately.

Instead use a dedicated "irq_pending" flag to indicate that
a guest interrupt is pending for the VCPU. We don't reuse the
existing exception bitmap as to avoid expensive atomic ops.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/kvm_host.h     |  1 +
 arch/powerpc/kernel/asm-offsets.c       |  1 +
 arch/powerpc/kvm/book3s_hv.c            |  2 +-
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 10 ++++++++++
 arch/powerpc/kvm/book3s_xive.c          |  3 +--
 5 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index e372ed871c51..ba915ee24825 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -711,6 +711,7 @@ struct kvm_vcpu_arch {
 	u8 ceded;
 	u8 prodded;
 	u8 doorbell_request;
+	u8 irq_pending; /* Used by XIVE to signal pending guest irqs */
 	u32 last_inst;
 
 	struct swait_queue_head *wqp;
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 8cfb20e38cfe..0dc911a1feac 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -514,6 +514,7 @@ int main(void)
 	OFFSET(VCPU_PENDING_EXC, kvm_vcpu, arch.pending_exceptions);
 	OFFSET(VCPU_CEDED, kvm_vcpu, arch.ceded);
 	OFFSET(VCPU_PRODDED, kvm_vcpu, arch.prodded);
+	OFFSET(VCPU_IRQ_PENDING, kvm_vcpu, arch.irq_pending);
 	OFFSET(VCPU_DBELL_REQ, kvm_vcpu, arch.doorbell_request);
 	OFFSET(VCPU_MMCR, kvm_vcpu, arch.mmcr);
 	OFFSET(VCPU_PMC, kvm_vcpu, arch.pmc);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 8d43cf205d34..4df5d198ea83 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2942,7 +2942,7 @@ static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu)
 {
 	if (!xive_enabled())
 		return false;
-	return vcpu->arch.xive_saved_state.pipr <
+	return vcpu->arch.irq_pending || vcpu->arch.xive_saved_state.pipr <
 		vcpu->arch.xive_saved_state.cppr;
 }
 #else
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 42639fba89e8..e4aedd48d988 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -997,6 +997,16 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
 	li	r9, 1
 	stw	r9, VCPU_XIVE_PUSHED(r4)
 	eieio
+
+	/*
+	 * We clear the irq_pending flag. There is a small chance of a
+	 * race vs. the escalation interrupt happening on another
+	 * processor setting it again, but the only consequence is to
+	 * cause a spurrious wakeup on the next H_CEDE which is not an
+	 * issue.
+	 */
+	li	r0,0
+	stb	r0, VCPU_IRQ_PENDING(r4);
 no_xive:
 #endif /* CONFIG_KVM_XICS */
 
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index a102efeabf05..eef9ccafdc09 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -84,8 +84,7 @@ static irqreturn_t xive_esc_irq(int irq, void *data)
 {
 	struct kvm_vcpu *vcpu = data;
 
-	/* We use the existing H_PROD mechanism to wake up the target */
-	vcpu->arch.prodded = 1;
+	vcpu->arch.irq_pending = 1;
 	smp_mb();
 	if (vcpu->arch.ceded)
 		kvmppc_fast_vcpu_kick(vcpu);
-- 
2.14.3

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH v2 03/11] powerpc/kvm/xive: Don't use existing "prodded" flag for xive escalations
@ 2017-11-23  4:36   ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 36+ messages in thread
From: Benjamin Herrenschmidt @ 2017-11-23  4:36 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: kvm, kvm-ppc, Benjamin Herrenschmidt

The prodded flag is only cleared at the beginning of H_CEDE,
so every time we have an escalation, we will cause the *next*
H_CEDE to return immediately.

Instead use a dedicated "irq_pending" flag to indicate that
a guest interrupt is pending for the VCPU. We don't reuse the
existing exception bitmap as to avoid expensive atomic ops.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/kvm_host.h     |  1 +
 arch/powerpc/kernel/asm-offsets.c       |  1 +
 arch/powerpc/kvm/book3s_hv.c            |  2 +-
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 10 ++++++++++
 arch/powerpc/kvm/book3s_xive.c          |  3 +--
 5 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index e372ed871c51..ba915ee24825 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -711,6 +711,7 @@ struct kvm_vcpu_arch {
 	u8 ceded;
 	u8 prodded;
 	u8 doorbell_request;
+	u8 irq_pending; /* Used by XIVE to signal pending guest irqs */
 	u32 last_inst;
 
 	struct swait_queue_head *wqp;
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 8cfb20e38cfe..0dc911a1feac 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -514,6 +514,7 @@ int main(void)
 	OFFSET(VCPU_PENDING_EXC, kvm_vcpu, arch.pending_exceptions);
 	OFFSET(VCPU_CEDED, kvm_vcpu, arch.ceded);
 	OFFSET(VCPU_PRODDED, kvm_vcpu, arch.prodded);
+	OFFSET(VCPU_IRQ_PENDING, kvm_vcpu, arch.irq_pending);
 	OFFSET(VCPU_DBELL_REQ, kvm_vcpu, arch.doorbell_request);
 	OFFSET(VCPU_MMCR, kvm_vcpu, arch.mmcr);
 	OFFSET(VCPU_PMC, kvm_vcpu, arch.pmc);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 8d43cf205d34..4df5d198ea83 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2942,7 +2942,7 @@ static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu)
 {
 	if (!xive_enabled())
 		return false;
-	return vcpu->arch.xive_saved_state.pipr <
+	return vcpu->arch.irq_pending || vcpu->arch.xive_saved_state.pipr <
 		vcpu->arch.xive_saved_state.cppr;
 }
 #else
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 42639fba89e8..e4aedd48d988 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -997,6 +997,16 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
 	li	r9, 1
 	stw	r9, VCPU_XIVE_PUSHED(r4)
 	eieio
+
+	/*
+	 * We clear the irq_pending flag. There is a small chance of a
+	 * race vs. the escalation interrupt happening on another
+	 * processor setting it again, but the only consequence is to
+	 * cause a spurrious wakeup on the next H_CEDE which is not an
+	 * issue.
+	 */
+	li	r0,0
+	stb	r0, VCPU_IRQ_PENDING(r4);
 no_xive:
 #endif /* CONFIG_KVM_XICS */
 
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index a102efeabf05..eef9ccafdc09 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -84,8 +84,7 @@ static irqreturn_t xive_esc_irq(int irq, void *data)
 {
 	struct kvm_vcpu *vcpu = data;
 
-	/* We use the existing H_PROD mechanism to wake up the target */
-	vcpu->arch.prodded = 1;
+	vcpu->arch.irq_pending = 1;
 	smp_mb();
 	if (vcpu->arch.ceded)
 		kvmppc_fast_vcpu_kick(vcpu);
-- 
2.14.3


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH v2 04/11] powerpc/kvm/xive: Check DR not IR to chose real vs virt mode MMIOs
  2017-11-23  4:36 ` Benjamin Herrenschmidt
@ 2017-11-23  4:36   ` Benjamin Herrenschmidt
  -1 siblings, 0 replies; 36+ messages in thread
From: Benjamin Herrenschmidt @ 2017-11-23  4:36 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: kvm, kvm-ppc, Benjamin Herrenschmidt

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index e4aedd48d988..39908e60401f 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1406,7 +1406,7 @@ guest_exit_cont:		/* r9 = vcpu, r12 = trap, r13 = paca */
 	li	r7, TM_SPC_PULL_OS_CTX
 	li	r6, TM_QW1_OS
 	mfmsr	r0
-	andi.	r0, r0, MSR_IR		/* in real mode? */
+	andi.	r0, r0, MSR_DR		/* in real mode? */
 	beq	2f
 	ld	r10, HSTATE_XIVE_TIMA_VIRT(r13)
 	cmpldi	cr0, r10, 0
-- 
2.14.3

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH v2 04/11] powerpc/kvm/xive: Check DR not IR to chose real vs virt mode MMIOs
@ 2017-11-23  4:36   ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 36+ messages in thread
From: Benjamin Herrenschmidt @ 2017-11-23  4:36 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: kvm, kvm-ppc, Benjamin Herrenschmidt

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index e4aedd48d988..39908e60401f 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1406,7 +1406,7 @@ guest_exit_cont:		/* r9 = vcpu, r12 = trap, r13 = paca */
 	li	r7, TM_SPC_PULL_OS_CTX
 	li	r6, TM_QW1_OS
 	mfmsr	r0
-	andi.	r0, r0, MSR_IR		/* in real mode? */
+	andi.	r0, r0, MSR_DR		/* in real mode? */
 	beq	2f
 	ld	r10, HSTATE_XIVE_TIMA_VIRT(r13)
 	cmpldi	cr0, r10, 0
-- 
2.14.3


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH v2 05/11] powerpc/kvm/xive: Make xive_pushed a byte, not a word
  2017-11-23  4:36 ` Benjamin Herrenschmidt
@ 2017-11-23  4:36   ` Benjamin Herrenschmidt
  -1 siblings, 0 replies; 36+ messages in thread
From: Benjamin Herrenschmidt @ 2017-11-23  4:36 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: kvm, kvm-ppc, Benjamin Herrenschmidt

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/kvm_host.h     | 3 ++-
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 6 +++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index ba915ee24825..8a4e77273b07 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -741,7 +741,8 @@ struct kvm_vcpu_arch {
 	struct kvmppc_icp *icp; /* XICS presentation controller */
 	struct kvmppc_xive_vcpu *xive_vcpu; /* XIVE virtual CPU data */
 	__be32 xive_cam_word;    /* Cooked W2 in proper endian with valid bit */
-	u32 xive_pushed;	 /* Is the VP pushed on the physical CPU ? */
+	u8 xive_pushed;		 /* Is the VP pushed on the physical CPU ? */
+	u8 xive_esc_on;		 /* Is the escalation irq enabled ? */
 	union xive_tma_w01 xive_saved_state; /* W0..1 of XIVE thread state */
 #endif
 
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 39908e60401f..286bcc4a73c2 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -995,7 +995,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
 	li	r9, TM_QW1_OS + TM_WORD2
 	stwcix	r11,r9,r10
 	li	r9, 1
-	stw	r9, VCPU_XIVE_PUSHED(r4)
+	stb	r9, VCPU_XIVE_PUSHED(r4)
 	eieio
 
 	/*
@@ -1400,7 +1400,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 guest_exit_cont:		/* r9 = vcpu, r12 = trap, r13 = paca */
 #ifdef CONFIG_KVM_XICS
 	/* We are exiting, pull the VP from the XIVE */
-	lwz	r0, VCPU_XIVE_PUSHED(r9)
+	lbz	r0, VCPU_XIVE_PUSHED(r9)
 	cmpwi	cr0, r0, 0
 	beq	1f
 	li	r7, TM_SPC_PULL_OS_CTX
@@ -1429,7 +1429,7 @@ guest_exit_cont:		/* r9 = vcpu, r12 = trap, r13 = paca */
 	/* Fixup some of the state for the next load */
 	li	r10, 0
 	li	r0, 0xff
-	stw	r10, VCPU_XIVE_PUSHED(r9)
+	stb	r10, VCPU_XIVE_PUSHED(r9)
 	stb	r10, (VCPU_XIVE_SAVED_STATE+3)(r9)
 	stb	r0, (VCPU_XIVE_SAVED_STATE+4)(r9)
 	eieio
-- 
2.14.3

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH v2 05/11] powerpc/kvm/xive: Make xive_pushed a byte, not a word
@ 2017-11-23  4:36   ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 36+ messages in thread
From: Benjamin Herrenschmidt @ 2017-11-23  4:36 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: kvm, kvm-ppc, Benjamin Herrenschmidt

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/kvm_host.h     | 3 ++-
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 6 +++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index ba915ee24825..8a4e77273b07 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -741,7 +741,8 @@ struct kvm_vcpu_arch {
 	struct kvmppc_icp *icp; /* XICS presentation controller */
 	struct kvmppc_xive_vcpu *xive_vcpu; /* XIVE virtual CPU data */
 	__be32 xive_cam_word;    /* Cooked W2 in proper endian with valid bit */
-	u32 xive_pushed;	 /* Is the VP pushed on the physical CPU ? */
+	u8 xive_pushed;		 /* Is the VP pushed on the physical CPU ? */
+	u8 xive_esc_on;		 /* Is the escalation irq enabled ? */
 	union xive_tma_w01 xive_saved_state; /* W0..1 of XIVE thread state */
 #endif
 
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 39908e60401f..286bcc4a73c2 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -995,7 +995,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
 	li	r9, TM_QW1_OS + TM_WORD2
 	stwcix	r11,r9,r10
 	li	r9, 1
-	stw	r9, VCPU_XIVE_PUSHED(r4)
+	stb	r9, VCPU_XIVE_PUSHED(r4)
 	eieio
 
 	/*
@@ -1400,7 +1400,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 guest_exit_cont:		/* r9 = vcpu, r12 = trap, r13 = paca */
 #ifdef CONFIG_KVM_XICS
 	/* We are exiting, pull the VP from the XIVE */
-	lwz	r0, VCPU_XIVE_PUSHED(r9)
+	lbz	r0, VCPU_XIVE_PUSHED(r9)
 	cmpwi	cr0, r0, 0
 	beq	1f
 	li	r7, TM_SPC_PULL_OS_CTX
@@ -1429,7 +1429,7 @@ guest_exit_cont:		/* r9 = vcpu, r12 = trap, r13 = paca */
 	/* Fixup some of the state for the next load */
 	li	r10, 0
 	li	r0, 0xff
-	stw	r10, VCPU_XIVE_PUSHED(r9)
+	stb	r10, VCPU_XIVE_PUSHED(r9)
 	stb	r10, (VCPU_XIVE_SAVED_STATE+3)(r9)
 	stb	r0, (VCPU_XIVE_SAVED_STATE+4)(r9)
 	eieio
-- 
2.14.3


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH v2 06/11] powerpc/xive: Move definition of ESB bits
  2017-11-23  4:36 ` Benjamin Herrenschmidt
@ 2017-11-23  4:36   ` Benjamin Herrenschmidt
  -1 siblings, 0 replies; 36+ messages in thread
From: Benjamin Herrenschmidt @ 2017-11-23  4:36 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: kvm, kvm-ppc, Benjamin Herrenschmidt

>From xive.h to xive-regs.h since it's a HW register definition
and it can be used from assembly

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/xive-regs.h | 35 +++++++++++++++++++++++++++++++++++
 arch/powerpc/include/asm/xive.h      | 35 -----------------------------------
 2 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/arch/powerpc/include/asm/xive-regs.h b/arch/powerpc/include/asm/xive-regs.h
index 1d3f2be5ae39..fa4288822b68 100644
--- a/arch/powerpc/include/asm/xive-regs.h
+++ b/arch/powerpc/include/asm/xive-regs.h
@@ -9,6 +9,41 @@
 #ifndef _ASM_POWERPC_XIVE_REGS_H
 #define _ASM_POWERPC_XIVE_REGS_H
 
+/*
+ * "magic" Event State Buffer (ESB) MMIO offsets.
+ *
+ * Each interrupt source has a 2-bit state machine called ESB
+ * which can be controlled by MMIO. It's made of 2 bits, P and
+ * Q. P indicates that an interrupt is pending (has been sent
+ * to a queue and is waiting for an EOI). Q indicates that the
+ * interrupt has been triggered while pending.
+ *
+ * This acts as a coalescing mechanism in order to guarantee
+ * that a given interrupt only occurs at most once in a queue.
+ *
+ * When doing an EOI, the Q bit will indicate if the interrupt
+ * needs to be re-triggered.
+ *
+ * The following offsets into the ESB MMIO allow to read or
+ * manipulate the PQ bits. They must be used with an 8-bytes
+ * load instruction. They all return the previous state of the
+ * interrupt (atomically).
+ *
+ * Additionally, some ESB pages support doing an EOI via a
+ * store at 0 and some ESBs support doing a trigger via a
+ * separate trigger page.
+ */
+#define XIVE_ESB_STORE_EOI	0x400 /* Store */
+#define XIVE_ESB_LOAD_EOI	0x000 /* Load */
+#define XIVE_ESB_GET		0x800 /* Load */
+#define XIVE_ESB_SET_PQ_00	0xc00 /* Load */
+#define XIVE_ESB_SET_PQ_01	0xd00 /* Load */
+#define XIVE_ESB_SET_PQ_10	0xe00 /* Load */
+#define XIVE_ESB_SET_PQ_11	0xf00 /* Load */
+
+#define XIVE_ESB_VAL_P		0x2
+#define XIVE_ESB_VAL_Q		0x1
+
 /*
  * Thread Management (aka "TM") registers
  */
diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
index 11d5edeb5c22..f786617ea8b7 100644
--- a/arch/powerpc/include/asm/xive.h
+++ b/arch/powerpc/include/asm/xive.h
@@ -72,41 +72,6 @@ struct xive_q {
 	atomic_t		pending_count;
 };
 
-/*
- * "magic" Event State Buffer (ESB) MMIO offsets.
- *
- * Each interrupt source has a 2-bit state machine called ESB
- * which can be controlled by MMIO. It's made of 2 bits, P and
- * Q. P indicates that an interrupt is pending (has been sent
- * to a queue and is waiting for an EOI). Q indicates that the
- * interrupt has been triggered while pending.
- *
- * This acts as a coalescing mechanism in order to guarantee
- * that a given interrupt only occurs at most once in a queue.
- *
- * When doing an EOI, the Q bit will indicate if the interrupt
- * needs to be re-triggered.
- *
- * The following offsets into the ESB MMIO allow to read or
- * manipulate the PQ bits. They must be used with an 8-bytes
- * load instruction. They all return the previous state of the
- * interrupt (atomically).
- *
- * Additionally, some ESB pages support doing an EOI via a
- * store at 0 and some ESBs support doing a trigger via a
- * separate trigger page.
- */
-#define XIVE_ESB_STORE_EOI	0x400 /* Store */
-#define XIVE_ESB_LOAD_EOI	0x000 /* Load */
-#define XIVE_ESB_GET		0x800 /* Load */
-#define XIVE_ESB_SET_PQ_00	0xc00 /* Load */
-#define XIVE_ESB_SET_PQ_01	0xd00 /* Load */
-#define XIVE_ESB_SET_PQ_10	0xe00 /* Load */
-#define XIVE_ESB_SET_PQ_11	0xf00 /* Load */
-
-#define XIVE_ESB_VAL_P		0x2
-#define XIVE_ESB_VAL_Q		0x1
-
 /* Global enable flags for the XIVE support */
 extern bool __xive_enabled;
 
-- 
2.14.3

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH v2 06/11] powerpc/xive: Move definition of ESB bits
@ 2017-11-23  4:36   ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 36+ messages in thread
From: Benjamin Herrenschmidt @ 2017-11-23  4:36 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: kvm, kvm-ppc, Benjamin Herrenschmidt

From xive.h to xive-regs.h since it's a HW register definition
and it can be used from assembly

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/xive-regs.h | 35 +++++++++++++++++++++++++++++++++++
 arch/powerpc/include/asm/xive.h      | 35 -----------------------------------
 2 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/arch/powerpc/include/asm/xive-regs.h b/arch/powerpc/include/asm/xive-regs.h
index 1d3f2be5ae39..fa4288822b68 100644
--- a/arch/powerpc/include/asm/xive-regs.h
+++ b/arch/powerpc/include/asm/xive-regs.h
@@ -9,6 +9,41 @@
 #ifndef _ASM_POWERPC_XIVE_REGS_H
 #define _ASM_POWERPC_XIVE_REGS_H
 
+/*
+ * "magic" Event State Buffer (ESB) MMIO offsets.
+ *
+ * Each interrupt source has a 2-bit state machine called ESB
+ * which can be controlled by MMIO. It's made of 2 bits, P and
+ * Q. P indicates that an interrupt is pending (has been sent
+ * to a queue and is waiting for an EOI). Q indicates that the
+ * interrupt has been triggered while pending.
+ *
+ * This acts as a coalescing mechanism in order to guarantee
+ * that a given interrupt only occurs at most once in a queue.
+ *
+ * When doing an EOI, the Q bit will indicate if the interrupt
+ * needs to be re-triggered.
+ *
+ * The following offsets into the ESB MMIO allow to read or
+ * manipulate the PQ bits. They must be used with an 8-bytes
+ * load instruction. They all return the previous state of the
+ * interrupt (atomically).
+ *
+ * Additionally, some ESB pages support doing an EOI via a
+ * store at 0 and some ESBs support doing a trigger via a
+ * separate trigger page.
+ */
+#define XIVE_ESB_STORE_EOI	0x400 /* Store */
+#define XIVE_ESB_LOAD_EOI	0x000 /* Load */
+#define XIVE_ESB_GET		0x800 /* Load */
+#define XIVE_ESB_SET_PQ_00	0xc00 /* Load */
+#define XIVE_ESB_SET_PQ_01	0xd00 /* Load */
+#define XIVE_ESB_SET_PQ_10	0xe00 /* Load */
+#define XIVE_ESB_SET_PQ_11	0xf00 /* Load */
+
+#define XIVE_ESB_VAL_P		0x2
+#define XIVE_ESB_VAL_Q		0x1
+
 /*
  * Thread Management (aka "TM") registers
  */
diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
index 11d5edeb5c22..f786617ea8b7 100644
--- a/arch/powerpc/include/asm/xive.h
+++ b/arch/powerpc/include/asm/xive.h
@@ -72,41 +72,6 @@ struct xive_q {
 	atomic_t		pending_count;
 };
 
-/*
- * "magic" Event State Buffer (ESB) MMIO offsets.
- *
- * Each interrupt source has a 2-bit state machine called ESB
- * which can be controlled by MMIO. It's made of 2 bits, P and
- * Q. P indicates that an interrupt is pending (has been sent
- * to a queue and is waiting for an EOI). Q indicates that the
- * interrupt has been triggered while pending.
- *
- * This acts as a coalescing mechanism in order to guarantee
- * that a given interrupt only occurs at most once in a queue.
- *
- * When doing an EOI, the Q bit will indicate if the interrupt
- * needs to be re-triggered.
- *
- * The following offsets into the ESB MMIO allow to read or
- * manipulate the PQ bits. They must be used with an 8-bytes
- * load instruction. They all return the previous state of the
- * interrupt (atomically).
- *
- * Additionally, some ESB pages support doing an EOI via a
- * store at 0 and some ESBs support doing a trigger via a
- * separate trigger page.
- */
-#define XIVE_ESB_STORE_EOI	0x400 /* Store */
-#define XIVE_ESB_LOAD_EOI	0x000 /* Load */
-#define XIVE_ESB_GET		0x800 /* Load */
-#define XIVE_ESB_SET_PQ_00	0xc00 /* Load */
-#define XIVE_ESB_SET_PQ_01	0xd00 /* Load */
-#define XIVE_ESB_SET_PQ_10	0xe00 /* Load */
-#define XIVE_ESB_SET_PQ_11	0xf00 /* Load */
-
-#define XIVE_ESB_VAL_P		0x2
-#define XIVE_ESB_VAL_Q		0x1
-
 /* Global enable flags for the XIVE support */
 extern bool __xive_enabled;
 
-- 
2.14.3


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH v2 07/11] powerpc/xive: Add interrupt flag to disable automatic EOI
  2017-11-23  4:36 ` Benjamin Herrenschmidt
@ 2017-11-23  4:36   ` Benjamin Herrenschmidt
  -1 siblings, 0 replies; 36+ messages in thread
From: Benjamin Herrenschmidt @ 2017-11-23  4:36 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: kvm, kvm-ppc, Benjamin Herrenschmidt

This will be used by KVM in order to keep escalation interrupts
in the non-EOI (masked) state after they fire. They will be
re-enabled directly in HW by KVM when needed.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/xive.h   | 3 +++
 arch/powerpc/sysdev/xive/common.c | 3 ++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
index f786617ea8b7..e602903c3029 100644
--- a/arch/powerpc/include/asm/xive.h
+++ b/arch/powerpc/include/asm/xive.h
@@ -58,6 +58,9 @@ struct xive_irq_data {
 #define XIVE_IRQ_FLAG_EOI_FW	0x10
 #define XIVE_IRQ_FLAG_H_INT_ESB	0x20
 
+/* Special flag set by KVM for excalation interrupts */
+#define XIVE_IRQ_NO_EOI		0x80
+
 #define XIVE_INVALID_CHIP_ID	-1
 
 /* A queue tracking structure in a CPU */
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index 838ebdbfe4c5..40c06110821c 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -367,7 +367,8 @@ static void xive_irq_eoi(struct irq_data *d)
 	 * EOI the source if it hasn't been disabled and hasn't
 	 * been passed-through to a KVM guest
 	 */
-	if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d))
+	if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d) &&
+	    !(xd->flags & XIVE_IRQ_NO_EOI))
 		xive_do_source_eoi(irqd_to_hwirq(d), xd);
 
 	/*
-- 
2.14.3

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH v2 07/11] powerpc/xive: Add interrupt flag to disable automatic EOI
@ 2017-11-23  4:36   ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 36+ messages in thread
From: Benjamin Herrenschmidt @ 2017-11-23  4:36 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: kvm, kvm-ppc, Benjamin Herrenschmidt

This will be used by KVM in order to keep escalation interrupts
in the non-EOI (masked) state after they fire. They will be
re-enabled directly in HW by KVM when needed.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/xive.h   | 3 +++
 arch/powerpc/sysdev/xive/common.c | 3 ++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
index f786617ea8b7..e602903c3029 100644
--- a/arch/powerpc/include/asm/xive.h
+++ b/arch/powerpc/include/asm/xive.h
@@ -58,6 +58,9 @@ struct xive_irq_data {
 #define XIVE_IRQ_FLAG_EOI_FW	0x10
 #define XIVE_IRQ_FLAG_H_INT_ESB	0x20
 
+/* Special flag set by KVM for excalation interrupts */
+#define XIVE_IRQ_NO_EOI		0x80
+
 #define XIVE_INVALID_CHIP_ID	-1
 
 /* A queue tracking structure in a CPU */
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index 838ebdbfe4c5..40c06110821c 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -367,7 +367,8 @@ static void xive_irq_eoi(struct irq_data *d)
 	 * EOI the source if it hasn't been disabled and hasn't
 	 * been passed-through to a KVM guest
 	 */
-	if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d))
+	if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d) &&
+	    !(xd->flags & XIVE_IRQ_NO_EOI))
 		xive_do_source_eoi(irqd_to_hwirq(d), xd);
 
 	/*
-- 
2.14.3


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH v2 08/11] powerpc/kvm/xive: Keep escalation interrupt masked unless ceded
  2017-11-23  4:36 ` Benjamin Herrenschmidt
@ 2017-11-23  4:36   ` Benjamin Herrenschmidt
  -1 siblings, 0 replies; 36+ messages in thread
From: Benjamin Herrenschmidt @ 2017-11-23  4:36 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: kvm, kvm-ppc, Benjamin Herrenschmidt

This works on top of the single escalation support. When in single
escalation, with this change, we will keep the escalation interrupt
disabled unless the VCPU is in H_CEDE (idle). In any other case, we
know the VCPU will be rescheduled and thus there is no need to take
escalation interrupts in the host whenever a guest interrupt fires.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/kvm_host.h     |  2 ++
 arch/powerpc/kernel/asm-offsets.c       |  3 ++
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 64 +++++++++++++++++++++++++++++++--
 arch/powerpc/kvm/book3s_xive.c          | 30 ++++++++++++++++
 4 files changed, 97 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 8a4e77273b07..e433fe2ce4b7 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -744,6 +744,8 @@ struct kvm_vcpu_arch {
 	u8 xive_pushed;		 /* Is the VP pushed on the physical CPU ? */
 	u8 xive_esc_on;		 /* Is the escalation irq enabled ? */
 	union xive_tma_w01 xive_saved_state; /* W0..1 of XIVE thread state */
+	u64 xive_esc_raddr;	 /* Escalation interrupt ESB real addr */
+	u64 xive_esc_vaddr;	 /* Escalation interrupt ESB virt addr */
 #endif
 
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 0dc911a1feac..eff521c67ec3 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -731,6 +731,9 @@ int main(void)
 	DEFINE(VCPU_XIVE_CAM_WORD, offsetof(struct kvm_vcpu,
 					    arch.xive_cam_word));
 	DEFINE(VCPU_XIVE_PUSHED, offsetof(struct kvm_vcpu, arch.xive_pushed));
+	DEFINE(VCPU_XIVE_ESC_ON, offsetof(struct kvm_vcpu, arch.xive_esc_on));
+	DEFINE(VCPU_XIVE_ESC_RADDR, offsetof(struct kvm_vcpu, arch.xive_esc_raddr));
+	DEFINE(VCPU_XIVE_ESC_VADDR, offsetof(struct kvm_vcpu, arch.xive_esc_vaddr));
 #endif
 
 #ifdef CONFIG_KVM_EXIT_TIMING
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 286bcc4a73c2..ec66b0e07f47 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1006,7 +1006,42 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
 	 * issue.
 	 */
 	li	r0,0
-	stb	r0, VCPU_IRQ_PENDING(r4);
+	stb	r0, VCPU_IRQ_PENDING(r4)
+
+	/*
+	 * In single escalation mode, if the escalation interrupt is
+	 * on, we mask it.
+	 */
+	lbz	r0, VCPU_XIVE_ESC_ON(r4)
+	cmpwi	r0,0
+	beq	1f
+	ld	r10, VCPU_XIVE_ESC_RADDR(r4)
+	li	r9, XIVE_ESB_SET_PQ_01
+	ldcix	r0, r10, r9
+	sync
+
+	/* We have a possible subtle race here: The escalation interrupt might
+	 * have fired and be on its way to the host queue while we mask it,
+	 * and if we unmask it early enough (re-cede right away), there is
+	 * a theorical possibility that it fires again, thus landing in the
+	 * target queue more than once which is a big no-no.
+	 *
+	 * Fortunately, solving this is rather easy. If the above load setting
+	 * PQ to 01 returns a previous value where P is set, then we know the
+	 * escalation interrupt is somewhere on its way to the host. In that
+	 * case we simply don't clear the xive_esc_on flag below. It will be
+	 * eventually cleared by the handler for the escalation interrupt.
+	 *
+	 * Then, when doing a cede, we check that flag again before re-enabling
+	 * the escalation interrupt, and if set, we abort the cede.
+	 */
+	andi.	r0, r0, XIVE_ESB_VAL_P
+	bne-	1f
+
+	/* Now P is 0, we can clear the flag */
+	li	r0, 0
+	stb	r0, VCPU_XIVE_ESC_ON(r4)
+1:
 no_xive:
 #endif /* CONFIG_KVM_XICS */
 
@@ -2705,7 +2740,32 @@ kvm_cede_prodded:
 	/* we've ceded but we want to give control to the host */
 kvm_cede_exit:
 	ld	r9, HSTATE_KVM_VCPU(r13)
-	b	guest_exit_cont
+#ifdef CONFIG_KVM_XICS
+	/* Abort if we still have a pending escalation */
+	lbz	r5, VCPU_XIVE_ESC_ON(r9)
+	cmpwi	r5, 0
+	beq	1f
+	li	r0, 0
+	stb	r0, VCPU_CEDED(r9)
+1:	/* Enable XIVE escalation */
+	li	r5, XIVE_ESB_SET_PQ_00
+	mfmsr	r0
+	andi.	r0, r0, MSR_DR		/* in real mode? */
+	beq	1f
+	ld	r10, VCPU_XIVE_ESC_VADDR(r9)
+	cmpdi	r10, 0
+	beq	3f
+	ldx	r0, r10, r5
+	b	2f
+1:	ld	r10, VCPU_XIVE_ESC_RADDR(r9)
+	cmpdi	r10, 0
+	beq	3f
+	ldcix	r0, r10, r5
+2:	sync
+	li	r0, 1
+	stb	r0, VCPU_XIVE_ESC_ON(r9)
+#endif /* CONFIG_KVM_XICS */
+3:	b	guest_exit_cont
 
 	/* Try to handle a machine check in real mode */
 machine_check_realmode:
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index eef9ccafdc09..7a047bc88f11 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -89,6 +89,17 @@ static irqreturn_t xive_esc_irq(int irq, void *data)
 	if (vcpu->arch.ceded)
 		kvmppc_fast_vcpu_kick(vcpu);
 
+	/* Since we have the no-EOI flag, the interrupt is effectively
+	 * disabled now. Clearing xive_esc_on means we won't bother
+	 * doing so on the next entry.
+	 *
+	 * This also allows the entry code to know that if a PQ combination
+	 * of 10 is observed while xive_esc_on is true, it means the queue
+	 * contains an unprocessed escalation interrupt. We don't make use of
+	 * that knowledge today but might (see comment in book3s_hv_rmhandler.S)
+	 */
+	vcpu->arch.xive_esc_on = false;
+
 	return IRQ_HANDLED;
 }
 
@@ -134,6 +145,25 @@ static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio)
 		goto error;
 	}
 	xc->esc_virq_names[prio] = name;
+
+	/* In single escalation mode, we grab the ESB MMIO of the
+	 * interrupt and mask it. Also populate the VCPU v/raddr
+	 * of the ESB page for use by asm entry/exit code. Finally
+	 * set the XIVE_IRQ_NO_EOI flag which will prevent the
+	 * core code from performing an EOI on the escalation
+	 * interrupt, thus leaving it effectively masked after
+	 * it fires once.
+	 */
+	if (xc->xive->single_escalation) {
+		struct irq_data *d = irq_get_irq_data(xc->esc_virq[prio]);
+		struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
+
+		xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_01);
+		vcpu->arch.xive_esc_raddr = xd->eoi_page;
+		vcpu->arch.xive_esc_vaddr = (__force u64)xd->eoi_mmio;
+		xd->flags |= XIVE_IRQ_NO_EOI;
+	}
+
 	return 0;
 error:
 	irq_dispose_mapping(xc->esc_virq[prio]);
-- 
2.14.3

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH v2 08/11] powerpc/kvm/xive: Keep escalation interrupt masked unless ceded
@ 2017-11-23  4:36   ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 36+ messages in thread
From: Benjamin Herrenschmidt @ 2017-11-23  4:36 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: kvm, kvm-ppc, Benjamin Herrenschmidt

This works on top of the single escalation support. When in single
escalation, with this change, we will keep the escalation interrupt
disabled unless the VCPU is in H_CEDE (idle). In any other case, we
know the VCPU will be rescheduled and thus there is no need to take
escalation interrupts in the host whenever a guest interrupt fires.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/kvm_host.h     |  2 ++
 arch/powerpc/kernel/asm-offsets.c       |  3 ++
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 64 +++++++++++++++++++++++++++++++--
 arch/powerpc/kvm/book3s_xive.c          | 30 ++++++++++++++++
 4 files changed, 97 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 8a4e77273b07..e433fe2ce4b7 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -744,6 +744,8 @@ struct kvm_vcpu_arch {
 	u8 xive_pushed;		 /* Is the VP pushed on the physical CPU ? */
 	u8 xive_esc_on;		 /* Is the escalation irq enabled ? */
 	union xive_tma_w01 xive_saved_state; /* W0..1 of XIVE thread state */
+	u64 xive_esc_raddr;	 /* Escalation interrupt ESB real addr */
+	u64 xive_esc_vaddr;	 /* Escalation interrupt ESB virt addr */
 #endif
 
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 0dc911a1feac..eff521c67ec3 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -731,6 +731,9 @@ int main(void)
 	DEFINE(VCPU_XIVE_CAM_WORD, offsetof(struct kvm_vcpu,
 					    arch.xive_cam_word));
 	DEFINE(VCPU_XIVE_PUSHED, offsetof(struct kvm_vcpu, arch.xive_pushed));
+	DEFINE(VCPU_XIVE_ESC_ON, offsetof(struct kvm_vcpu, arch.xive_esc_on));
+	DEFINE(VCPU_XIVE_ESC_RADDR, offsetof(struct kvm_vcpu, arch.xive_esc_raddr));
+	DEFINE(VCPU_XIVE_ESC_VADDR, offsetof(struct kvm_vcpu, arch.xive_esc_vaddr));
 #endif
 
 #ifdef CONFIG_KVM_EXIT_TIMING
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 286bcc4a73c2..ec66b0e07f47 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1006,7 +1006,42 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
 	 * issue.
 	 */
 	li	r0,0
-	stb	r0, VCPU_IRQ_PENDING(r4);
+	stb	r0, VCPU_IRQ_PENDING(r4)
+
+	/*
+	 * In single escalation mode, if the escalation interrupt is
+	 * on, we mask it.
+	 */
+	lbz	r0, VCPU_XIVE_ESC_ON(r4)
+	cmpwi	r0,0
+	beq	1f
+	ld	r10, VCPU_XIVE_ESC_RADDR(r4)
+	li	r9, XIVE_ESB_SET_PQ_01
+	ldcix	r0, r10, r9
+	sync
+
+	/* We have a possible subtle race here: The escalation interrupt might
+	 * have fired and be on its way to the host queue while we mask it,
+	 * and if we unmask it early enough (re-cede right away), there is
+	 * a theorical possibility that it fires again, thus landing in the
+	 * target queue more than once which is a big no-no.
+	 *
+	 * Fortunately, solving this is rather easy. If the above load setting
+	 * PQ to 01 returns a previous value where P is set, then we know the
+	 * escalation interrupt is somewhere on its way to the host. In that
+	 * case we simply don't clear the xive_esc_on flag below. It will be
+	 * eventually cleared by the handler for the escalation interrupt.
+	 *
+	 * Then, when doing a cede, we check that flag again before re-enabling
+	 * the escalation interrupt, and if set, we abort the cede.
+	 */
+	andi.	r0, r0, XIVE_ESB_VAL_P
+	bne-	1f
+
+	/* Now P is 0, we can clear the flag */
+	li	r0, 0
+	stb	r0, VCPU_XIVE_ESC_ON(r4)
+1:
 no_xive:
 #endif /* CONFIG_KVM_XICS */
 
@@ -2705,7 +2740,32 @@ kvm_cede_prodded:
 	/* we've ceded but we want to give control to the host */
 kvm_cede_exit:
 	ld	r9, HSTATE_KVM_VCPU(r13)
-	b	guest_exit_cont
+#ifdef CONFIG_KVM_XICS
+	/* Abort if we still have a pending escalation */
+	lbz	r5, VCPU_XIVE_ESC_ON(r9)
+	cmpwi	r5, 0
+	beq	1f
+	li	r0, 0
+	stb	r0, VCPU_CEDED(r9)
+1:	/* Enable XIVE escalation */
+	li	r5, XIVE_ESB_SET_PQ_00
+	mfmsr	r0
+	andi.	r0, r0, MSR_DR		/* in real mode? */
+	beq	1f
+	ld	r10, VCPU_XIVE_ESC_VADDR(r9)
+	cmpdi	r10, 0
+	beq	3f
+	ldx	r0, r10, r5
+	b	2f
+1:	ld	r10, VCPU_XIVE_ESC_RADDR(r9)
+	cmpdi	r10, 0
+	beq	3f
+	ldcix	r0, r10, r5
+2:	sync
+	li	r0, 1
+	stb	r0, VCPU_XIVE_ESC_ON(r9)
+#endif /* CONFIG_KVM_XICS */
+3:	b	guest_exit_cont
 
 	/* Try to handle a machine check in real mode */
 machine_check_realmode:
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index eef9ccafdc09..7a047bc88f11 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -89,6 +89,17 @@ static irqreturn_t xive_esc_irq(int irq, void *data)
 	if (vcpu->arch.ceded)
 		kvmppc_fast_vcpu_kick(vcpu);
 
+	/* Since we have the no-EOI flag, the interrupt is effectively
+	 * disabled now. Clearing xive_esc_on means we won't bother
+	 * doing so on the next entry.
+	 *
+	 * This also allows the entry code to know that if a PQ combination
+	 * of 10 is observed while xive_esc_on is true, it means the queue
+	 * contains an unprocessed escalation interrupt. We don't make use of
+	 * that knowledge today but might (see comment in book3s_hv_rmhandler.S)
+	 */
+	vcpu->arch.xive_esc_on = false;
+
 	return IRQ_HANDLED;
 }
 
@@ -134,6 +145,25 @@ static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio)
 		goto error;
 	}
 	xc->esc_virq_names[prio] = name;
+
+	/* In single escalation mode, we grab the ESB MMIO of the
+	 * interrupt and mask it. Also populate the VCPU v/raddr
+	 * of the ESB page for use by asm entry/exit code. Finally
+	 * set the XIVE_IRQ_NO_EOI flag which will prevent the
+	 * core code from performing an EOI on the escalation
+	 * interrupt, thus leaving it effectively masked after
+	 * it fires once.
+	 */
+	if (xc->xive->single_escalation) {
+		struct irq_data *d = irq_get_irq_data(xc->esc_virq[prio]);
+		struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
+
+		xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_01);
+		vcpu->arch.xive_esc_raddr = xd->eoi_page;
+		vcpu->arch.xive_esc_vaddr = (__force u64)xd->eoi_mmio;
+		xd->flags |= XIVE_IRQ_NO_EOI;
+	}
+
 	return 0;
 error:
 	irq_dispose_mapping(xc->esc_virq[prio]);
-- 
2.14.3


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH v2 09/11] powerpc/kvm: Make "no_xive:" label local
  2017-11-23  4:36 ` Benjamin Herrenschmidt
@ 2017-11-23  4:36   ` Benjamin Herrenschmidt
  -1 siblings, 0 replies; 36+ messages in thread
From: Benjamin Herrenschmidt @ 2017-11-23  4:36 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: kvm, kvm-ppc, Benjamin Herrenschmidt

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index ec66b0e07f47..83862fba8cfb 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -986,7 +986,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
 	/* We are entering the guest on that thread, push VCPU to XIVE */
 	ld	r10, HSTATE_XIVE_TIMA_PHYS(r13)
 	cmpldi	cr0, r10, 0
-	beq	no_xive
+	beq	.Lno_xive
 	ld	r11, VCPU_XIVE_SAVED_STATE(r4)
 	li	r9, TM_QW1_OS
 	eieio
@@ -1042,7 +1042,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
 	li	r0, 0
 	stb	r0, VCPU_XIVE_ESC_ON(r4)
 1:
-no_xive:
+.Lno_xive:
 #endif /* CONFIG_KVM_XICS */
 
 deliver_guest_interrupt:
-- 
2.14.3

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH v2 09/11] powerpc/kvm: Make "no_xive:" label local
@ 2017-11-23  4:36   ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 36+ messages in thread
From: Benjamin Herrenschmidt @ 2017-11-23  4:36 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: kvm, kvm-ppc, Benjamin Herrenschmidt

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index ec66b0e07f47..83862fba8cfb 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -986,7 +986,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
 	/* We are entering the guest on that thread, push VCPU to XIVE */
 	ld	r10, HSTATE_XIVE_TIMA_PHYS(r13)
 	cmpldi	cr0, r10, 0
-	beq	no_xive
+	beq	.Lno_xive
 	ld	r11, VCPU_XIVE_SAVED_STATE(r4)
 	li	r9, TM_QW1_OS
 	eieio
@@ -1042,7 +1042,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
 	li	r0, 0
 	stb	r0, VCPU_XIVE_ESC_ON(r4)
 1:
-no_xive:
+.Lno_xive:
 #endif /* CONFIG_KVM_XICS */
 
 deliver_guest_interrupt:
-- 
2.14.3


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH v2 10/11] powerpc/kvm: Store the MMU mode in the PACA on KVM exit
  2017-11-23  4:36 ` Benjamin Herrenschmidt
@ 2017-11-23  4:36   ` Benjamin Herrenschmidt
  -1 siblings, 0 replies; 36+ messages in thread
From: Benjamin Herrenschmidt @ 2017-11-23  4:36 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: kvm, kvm-ppc, Benjamin Herrenschmidt

This stores the MMU mode (real vs. virt) in the PACA on exceptions
that do a KVM test.

At the moment, we do this unconditionally in those exceptions due
to how the macro system is put together. In the future we could
find a way to only do it when actually exiting a guest.

This will avoid a pile of mfmsr in the KVM exit path

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/exception-64s.h  | 38 ++++++++++++++++++++++---------
 arch/powerpc/include/asm/kvm_book3s_asm.h |  1 +
 arch/powerpc/kernel/asm-offsets.c         |  1 +
 arch/powerpc/kernel/exceptions-64s.S      | 15 +++++++-----
 4 files changed, 38 insertions(+), 17 deletions(-)

diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 9a318973af05..0921328aea78 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -238,7 +238,9 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 	EXCEPTION_PROLOG_1(area, extra, vec);				\
 	EXCEPTION_PROLOG_PSERIES_1(label, h);
 
-#define __KVMTEST(h, n)							\
+#define __KVMTEST(h, n, v)						\
+	li	r10,v;							\
+	stb	r10,HSTATE_EXIT_VIRT(r13);				\
 	lbz	r10,HSTATE_IN_GUEST(r13);				\
 	cmpwi	r10,0;							\
 	bne	do_kvm_##h##n
@@ -348,12 +350,12 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 	b	kvmppc_skip_##h##interrupt
 
 #ifdef CONFIG_KVM_BOOK3S_64_HANDLER
-#define KVMTEST(h, n)			__KVMTEST(h, n)
+#define KVMTEST(h, n, v)		__KVMTEST(h, n, v)
 #define KVM_HANDLER(area, h, n)		__KVM_HANDLER(area, h, n)
 #define KVM_HANDLER_SKIP(area, h, n)	__KVM_HANDLER_SKIP(area, h, n)
 
 #else
-#define KVMTEST(h, n)
+#define KVMTEST(h, n, v)
 #define KVM_HANDLER(area, h, n)
 #define KVM_HANDLER_SKIP(area, h, n)
 #endif
@@ -477,10 +479,10 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 #define STD_RELON_EXCEPTION_HV(loc, vec, label)		\
 	SET_SCRATCH0(r13);	/* save r13 */		\
 	EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label,	\
-				       EXC_HV, KVMTEST_HV, vec);
+				       EXC_HV, KVMTEST_RELON_HV, vec);
 
 #define STD_RELON_EXCEPTION_HV_OOL(vec, label)			\
-	EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_HV, vec);	\
+	EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_RELON_HV, vec);	\
 	EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_HV)
 
 /* This associate vector numbers with bits in paca->irq_happened */
@@ -501,18 +503,32 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 #define _SOFTEN_TEST(h, vec)	__SOFTEN_TEST(h, vec)
 
 #define SOFTEN_TEST_PR(vec)						\
-	KVMTEST(EXC_STD, vec);						\
+	KVMTEST(EXC_STD, vec, 0);					\
 	_SOFTEN_TEST(EXC_STD, vec)
 
 #define SOFTEN_TEST_HV(vec)						\
-	KVMTEST(EXC_HV, vec);						\
+	KVMTEST(EXC_HV, vec, 0);					\
+	_SOFTEN_TEST(EXC_HV, vec)
+
+#define SOFTEN_TEST_RELON_PR(vec)					\
+	KVMTEST(EXC_STD, vec, 1);					\
+	_SOFTEN_TEST(EXC_STD, vec)
+
+#define SOFTEN_TEST_RELON_HV(vec)					\
+	KVMTEST(EXC_HV, vec, 1);					\
 	_SOFTEN_TEST(EXC_HV, vec)
 
 #define KVMTEST_PR(vec)							\
-	KVMTEST(EXC_STD, vec)
+	KVMTEST(EXC_STD, vec, 0)
 
 #define KVMTEST_HV(vec)							\
-	KVMTEST(EXC_HV, vec)
+	KVMTEST(EXC_HV, vec, 0)
+
+#define KVMTEST_RELON_PR(vec)						\
+	KVMTEST(EXC_STD, vec, 1)
+
+#define KVMTEST_RELON_HV(vec)						\
+	KVMTEST(EXC_HV, vec, 1)
 
 #define SOFTEN_NOTEST_PR(vec)		_SOFTEN_TEST(EXC_STD, vec)
 #define SOFTEN_NOTEST_HV(vec)		_SOFTEN_TEST(EXC_HV, vec)
@@ -557,10 +573,10 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 
 #define MASKABLE_RELON_EXCEPTION_HV(loc, vec, label)			\
 	_MASKABLE_RELON_EXCEPTION_PSERIES(vec, label,			\
-					  EXC_HV, SOFTEN_TEST_HV)
+					  EXC_HV, SOFTEN_TEST_RELON_HV)
 
 #define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label)			\
-	EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec);		\
+	EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_RELON_HV, vec);	\
 	EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_HV)
 
 /*
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index 83596f32f50b..7775a278e56e 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -118,6 +118,7 @@ struct kvmppc_host_state {
 	void __iomem *xive_tima_phys;
 	void __iomem *xive_tima_virt;
 	u32 saved_xirr;
+	u8 exit_virt;		/* MMU mode on exception exit */
 	u64 dabr;
 	u64 host_mmcr[7];	/* MMCR 0,1,A, SIAR, SDAR, MMCR2, SIER */
 	u32 host_pmc[8];
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index eff521c67ec3..87e3cd09659c 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -641,6 +641,7 @@ int main(void)
 	HSTATE_FIELD(HSTATE_XIVE_TIMA_PHYS, xive_tima_phys);
 	HSTATE_FIELD(HSTATE_XIVE_TIMA_VIRT, xive_tima_virt);
 	HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr);
+	HSTATE_FIELD(HSTATE_EXIT_VIRT, exit_virt);
 	HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi);
 	HSTATE_FIELD(HSTATE_PTID, ptid);
 	HSTATE_FIELD(HSTATE_MMCR0, host_mmcr[0]);
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 1c80bd292e48..8a0cd4fdf015 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -700,9 +700,9 @@ EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x100)
 	.globl hardware_interrupt_relon_hv;
 hardware_interrupt_relon_hv:
 	BEGIN_FTR_SECTION
-		_MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, EXC_HV, SOFTEN_TEST_HV)
+		_MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, EXC_HV, SOFTEN_TEST_RELON_HV)
 	FTR_SECTION_ELSE
-		_MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, EXC_STD, SOFTEN_TEST_PR)
+		_MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, EXC_STD, SOFTEN_TEST_RELON_PR)
 	ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
 EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100)
 
@@ -870,21 +870,24 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
 	 * Userspace syscalls have already saved the PPR, hcalls must save
 	 * it before setting HMT_MEDIUM.
 	 */
-#define SYSCALL_KVMTEST							\
+#define __SYSCALL_KVMTEST(test)						\
 	mtctr	r13;							\
 	GET_PACA(r13);							\
 	std	r10,PACA_EXGEN+EX_R10(r13);				\
-	KVMTEST_PR(0xc00); /* uses r10, branch to do_kvm_0xc00_system_call */ \
+	test;	/* uses r10, branch to do_kvm_0xc00_system_call */	\
 	HMT_MEDIUM;							\
 	mfctr	r9;
+#define SYSCALL_KVMTEST		__SYSCALL_KVMTEST(KVMTEST_PR(0xc00))
+#define SYSCALL_KVMTEST_RELON	__SYSCALL_KVMTEST(KVMTEST_RELON_PR(0xc00))
 
 #else
 #define SYSCALL_KVMTEST							\
 	HMT_MEDIUM;							\
 	mr	r9,r13;							\
 	GET_PACA(r13);
+#define SYSCALL_KVMTEST_RELON SYSCALL_KVMTEST
 #endif
-	
+
 #define LOAD_SYSCALL_HANDLER(reg)					\
 	__LOAD_HANDLER(reg, system_call_common)
 
@@ -948,7 +951,7 @@ EXC_REAL_BEGIN(system_call, 0xc00, 0x100)
 EXC_REAL_END(system_call, 0xc00, 0x100)
 
 EXC_VIRT_BEGIN(system_call, 0x4c00, 0x100)
-	SYSCALL_KVMTEST /* loads PACA into r13, and saves r13 to r9 */
+	SYSCALL_KVMTEST_RELON /* loads PACA into r13, and saves r13 to r9 */
 	SYSCALL_FASTENDIAN_TEST
 	SYSCALL_VIRT
 	SYSCALL_FASTENDIAN
-- 
2.14.3

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH v2 10/11] powerpc/kvm: Store the MMU mode in the PACA on KVM exit
@ 2017-11-23  4:36   ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 36+ messages in thread
From: Benjamin Herrenschmidt @ 2017-11-23  4:36 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: kvm, kvm-ppc, Benjamin Herrenschmidt

This stores the MMU mode (real vs. virt) in the PACA on exceptions
that do a KVM test.

At the moment, we do this unconditionally in those exceptions due
to how the macro system is put together. In the future we could
find a way to only do it when actually exiting a guest.

This will avoid a pile of mfmsr in the KVM exit path

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/exception-64s.h  | 38 ++++++++++++++++++++++---------
 arch/powerpc/include/asm/kvm_book3s_asm.h |  1 +
 arch/powerpc/kernel/asm-offsets.c         |  1 +
 arch/powerpc/kernel/exceptions-64s.S      | 15 +++++++-----
 4 files changed, 38 insertions(+), 17 deletions(-)

diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 9a318973af05..0921328aea78 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -238,7 +238,9 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 	EXCEPTION_PROLOG_1(area, extra, vec);				\
 	EXCEPTION_PROLOG_PSERIES_1(label, h);
 
-#define __KVMTEST(h, n)							\
+#define __KVMTEST(h, n, v)						\
+	li	r10,v;							\
+	stb	r10,HSTATE_EXIT_VIRT(r13);				\
 	lbz	r10,HSTATE_IN_GUEST(r13);				\
 	cmpwi	r10,0;							\
 	bne	do_kvm_##h##n
@@ -348,12 +350,12 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 	b	kvmppc_skip_##h##interrupt
 
 #ifdef CONFIG_KVM_BOOK3S_64_HANDLER
-#define KVMTEST(h, n)			__KVMTEST(h, n)
+#define KVMTEST(h, n, v)		__KVMTEST(h, n, v)
 #define KVM_HANDLER(area, h, n)		__KVM_HANDLER(area, h, n)
 #define KVM_HANDLER_SKIP(area, h, n)	__KVM_HANDLER_SKIP(area, h, n)
 
 #else
-#define KVMTEST(h, n)
+#define KVMTEST(h, n, v)
 #define KVM_HANDLER(area, h, n)
 #define KVM_HANDLER_SKIP(area, h, n)
 #endif
@@ -477,10 +479,10 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 #define STD_RELON_EXCEPTION_HV(loc, vec, label)		\
 	SET_SCRATCH0(r13);	/* save r13 */		\
 	EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label,	\
-				       EXC_HV, KVMTEST_HV, vec);
+				       EXC_HV, KVMTEST_RELON_HV, vec);
 
 #define STD_RELON_EXCEPTION_HV_OOL(vec, label)			\
-	EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_HV, vec);	\
+	EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_RELON_HV, vec);	\
 	EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_HV)
 
 /* This associate vector numbers with bits in paca->irq_happened */
@@ -501,18 +503,32 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 #define _SOFTEN_TEST(h, vec)	__SOFTEN_TEST(h, vec)
 
 #define SOFTEN_TEST_PR(vec)						\
-	KVMTEST(EXC_STD, vec);						\
+	KVMTEST(EXC_STD, vec, 0);					\
 	_SOFTEN_TEST(EXC_STD, vec)
 
 #define SOFTEN_TEST_HV(vec)						\
-	KVMTEST(EXC_HV, vec);						\
+	KVMTEST(EXC_HV, vec, 0);					\
+	_SOFTEN_TEST(EXC_HV, vec)
+
+#define SOFTEN_TEST_RELON_PR(vec)					\
+	KVMTEST(EXC_STD, vec, 1);					\
+	_SOFTEN_TEST(EXC_STD, vec)
+
+#define SOFTEN_TEST_RELON_HV(vec)					\
+	KVMTEST(EXC_HV, vec, 1);					\
 	_SOFTEN_TEST(EXC_HV, vec)
 
 #define KVMTEST_PR(vec)							\
-	KVMTEST(EXC_STD, vec)
+	KVMTEST(EXC_STD, vec, 0)
 
 #define KVMTEST_HV(vec)							\
-	KVMTEST(EXC_HV, vec)
+	KVMTEST(EXC_HV, vec, 0)
+
+#define KVMTEST_RELON_PR(vec)						\
+	KVMTEST(EXC_STD, vec, 1)
+
+#define KVMTEST_RELON_HV(vec)						\
+	KVMTEST(EXC_HV, vec, 1)
 
 #define SOFTEN_NOTEST_PR(vec)		_SOFTEN_TEST(EXC_STD, vec)
 #define SOFTEN_NOTEST_HV(vec)		_SOFTEN_TEST(EXC_HV, vec)
@@ -557,10 +573,10 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 
 #define MASKABLE_RELON_EXCEPTION_HV(loc, vec, label)			\
 	_MASKABLE_RELON_EXCEPTION_PSERIES(vec, label,			\
-					  EXC_HV, SOFTEN_TEST_HV)
+					  EXC_HV, SOFTEN_TEST_RELON_HV)
 
 #define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label)			\
-	EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec);		\
+	EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_RELON_HV, vec);	\
 	EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_HV)
 
 /*
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index 83596f32f50b..7775a278e56e 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -118,6 +118,7 @@ struct kvmppc_host_state {
 	void __iomem *xive_tima_phys;
 	void __iomem *xive_tima_virt;
 	u32 saved_xirr;
+	u8 exit_virt;		/* MMU mode on exception exit */
 	u64 dabr;
 	u64 host_mmcr[7];	/* MMCR 0,1,A, SIAR, SDAR, MMCR2, SIER */
 	u32 host_pmc[8];
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index eff521c67ec3..87e3cd09659c 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -641,6 +641,7 @@ int main(void)
 	HSTATE_FIELD(HSTATE_XIVE_TIMA_PHYS, xive_tima_phys);
 	HSTATE_FIELD(HSTATE_XIVE_TIMA_VIRT, xive_tima_virt);
 	HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr);
+	HSTATE_FIELD(HSTATE_EXIT_VIRT, exit_virt);
 	HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi);
 	HSTATE_FIELD(HSTATE_PTID, ptid);
 	HSTATE_FIELD(HSTATE_MMCR0, host_mmcr[0]);
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 1c80bd292e48..8a0cd4fdf015 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -700,9 +700,9 @@ EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x100)
 	.globl hardware_interrupt_relon_hv;
 hardware_interrupt_relon_hv:
 	BEGIN_FTR_SECTION
-		_MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, EXC_HV, SOFTEN_TEST_HV)
+		_MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, EXC_HV, SOFTEN_TEST_RELON_HV)
 	FTR_SECTION_ELSE
-		_MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, EXC_STD, SOFTEN_TEST_PR)
+		_MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, EXC_STD, SOFTEN_TEST_RELON_PR)
 	ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
 EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100)
 
@@ -870,21 +870,24 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
 	 * Userspace syscalls have already saved the PPR, hcalls must save
 	 * it before setting HMT_MEDIUM.
 	 */
-#define SYSCALL_KVMTEST							\
+#define __SYSCALL_KVMTEST(test)						\
 	mtctr	r13;							\
 	GET_PACA(r13);							\
 	std	r10,PACA_EXGEN+EX_R10(r13);				\
-	KVMTEST_PR(0xc00); /* uses r10, branch to do_kvm_0xc00_system_call */ \
+	test;	/* uses r10, branch to do_kvm_0xc00_system_call */	\
 	HMT_MEDIUM;							\
 	mfctr	r9;
+#define SYSCALL_KVMTEST		__SYSCALL_KVMTEST(KVMTEST_PR(0xc00))
+#define SYSCALL_KVMTEST_RELON	__SYSCALL_KVMTEST(KVMTEST_RELON_PR(0xc00))
 
 #else
 #define SYSCALL_KVMTEST							\
 	HMT_MEDIUM;							\
 	mr	r9,r13;							\
 	GET_PACA(r13);
+#define SYSCALL_KVMTEST_RELON SYSCALL_KVMTEST
 #endif
-	
+
 #define LOAD_SYSCALL_HANDLER(reg)					\
 	__LOAD_HANDLER(reg, system_call_common)
 
@@ -948,7 +951,7 @@ EXC_REAL_BEGIN(system_call, 0xc00, 0x100)
 EXC_REAL_END(system_call, 0xc00, 0x100)
 
 EXC_VIRT_BEGIN(system_call, 0x4c00, 0x100)
-	SYSCALL_KVMTEST /* loads PACA into r13, and saves r13 to r9 */
+	SYSCALL_KVMTEST_RELON /* loads PACA into r13, and saves r13 to r9 */
 	SYSCALL_FASTENDIAN_TEST
 	SYSCALL_VIRT
 	SYSCALL_FASTENDIAN
-- 
2.14.3


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH v2 11/11] powerpc/kvm: Use the PACA virt/real mode info instead of mfmsr
  2017-11-23  4:36 ` Benjamin Herrenschmidt
@ 2017-11-23  4:36   ` Benjamin Herrenschmidt
  -1 siblings, 0 replies; 36+ messages in thread
From: Benjamin Herrenschmidt @ 2017-11-23  4:36 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: kvm, kvm-ppc, Benjamin Herrenschmidt

This shaves off a bunch of cycles from the KVM exit path
and the XIVE related hypercall.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kvm/book3s_hv_builtin.c    |  5 ++---
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 16 ++++++++++------
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 90644db9d38e..45525b76b724 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -209,8 +209,7 @@ long kvmppc_h_random(struct kvm_vcpu *vcpu)
 {
 	int r;
 
-	/* Only need to do the expensive mfmsr() on radix */
-	if (kvm_is_radix(vcpu->kvm) && (mfmsr() & MSR_IR))
+	if (local_paca->kvm_hstate.exit_virt)
 		r = powernv_get_random_long(&vcpu->arch.gpr[4]);
 	else
 		r = powernv_get_random_real_mode(&vcpu->arch.gpr[4]);
@@ -524,7 +523,7 @@ static long kvmppc_read_one_intr(bool *again)
 #ifdef CONFIG_KVM_XICS
 static inline bool is_rm(void)
 {
-	return !(mfmsr() & MSR_DR);
+	return !local_paca->kvm_hstate.exit_virt;
 }
 
 unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu)
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 83862fba8cfb..ade13f7a9077 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -81,6 +81,8 @@ _GLOBAL_TOC(kvmppc_hv_entry_trampoline)
 	RFI
 
 kvmppc_call_hv_entry:
+	li	r0,0
+	stb	r0,HSTATE_EXIT_VIRT(r13)
 	ld	r4, HSTATE_KVM_VCPU(r13)
 	bl	kvmppc_hv_entry
 
@@ -176,8 +178,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
 	 * we will be in virtual mode at this point, which makes it a
 	 * little easier to get back to the caller.
 	 */
-	mfmsr	r0
-	andi.	r0, r0, MSR_IR		/* in real mode? */
+	lbz	r0,HSTATE_EXIT_VIRT(r13)
+	cmpwi	r0,0
 	bne	.Lvirt_return
 
 	/* RFI into the highmem handler */
@@ -1440,8 +1442,9 @@ guest_exit_cont:		/* r9 = vcpu, r12 = trap, r13 = paca */
 	beq	1f
 	li	r7, TM_SPC_PULL_OS_CTX
 	li	r6, TM_QW1_OS
-	mfmsr	r0
-	andi.	r0, r0, MSR_DR		/* in real mode? */
+	/* Are we in virtual or real mode ? */
+	lbz	r0,HSTATE_EXIT_VIRT(r13)
+	cmpwi	r0,0
 	beq	2f
 	ld	r10, HSTATE_XIVE_TIMA_VIRT(r13)
 	cmpldi	cr0, r10, 0
@@ -2749,8 +2752,9 @@ kvm_cede_exit:
 	stb	r0, VCPU_CEDED(r9)
 1:	/* Enable XIVE escalation */
 	li	r5, XIVE_ESB_SET_PQ_00
-	mfmsr	r0
-	andi.	r0, r0, MSR_DR		/* in real mode? */
+	/* Are we in virtual or real mode ? */
+	lbz	r0,HSTATE_EXIT_VIRT(r13)
+	cmpwi	r0,0
 	beq	1f
 	ld	r10, VCPU_XIVE_ESC_VADDR(r9)
 	cmpdi	r10, 0
-- 
2.14.3

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH v2 11/11] powerpc/kvm: Use the PACA virt/real mode info instead of mfmsr
@ 2017-11-23  4:36   ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 36+ messages in thread
From: Benjamin Herrenschmidt @ 2017-11-23  4:36 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: kvm, kvm-ppc, Benjamin Herrenschmidt

This shaves off a bunch of cycles from the KVM exit path
and the XIVE related hypercall.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kvm/book3s_hv_builtin.c    |  5 ++---
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 16 ++++++++++------
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 90644db9d38e..45525b76b724 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -209,8 +209,7 @@ long kvmppc_h_random(struct kvm_vcpu *vcpu)
 {
 	int r;
 
-	/* Only need to do the expensive mfmsr() on radix */
-	if (kvm_is_radix(vcpu->kvm) && (mfmsr() & MSR_IR))
+	if (local_paca->kvm_hstate.exit_virt)
 		r = powernv_get_random_long(&vcpu->arch.gpr[4]);
 	else
 		r = powernv_get_random_real_mode(&vcpu->arch.gpr[4]);
@@ -524,7 +523,7 @@ static long kvmppc_read_one_intr(bool *again)
 #ifdef CONFIG_KVM_XICS
 static inline bool is_rm(void)
 {
-	return !(mfmsr() & MSR_DR);
+	return !local_paca->kvm_hstate.exit_virt;
 }
 
 unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu)
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 83862fba8cfb..ade13f7a9077 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -81,6 +81,8 @@ _GLOBAL_TOC(kvmppc_hv_entry_trampoline)
 	RFI
 
 kvmppc_call_hv_entry:
+	li	r0,0
+	stb	r0,HSTATE_EXIT_VIRT(r13)
 	ld	r4, HSTATE_KVM_VCPU(r13)
 	bl	kvmppc_hv_entry
 
@@ -176,8 +178,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
 	 * we will be in virtual mode at this point, which makes it a
 	 * little easier to get back to the caller.
 	 */
-	mfmsr	r0
-	andi.	r0, r0, MSR_IR		/* in real mode? */
+	lbz	r0,HSTATE_EXIT_VIRT(r13)
+	cmpwi	r0,0
 	bne	.Lvirt_return
 
 	/* RFI into the highmem handler */
@@ -1440,8 +1442,9 @@ guest_exit_cont:		/* r9 = vcpu, r12 = trap, r13 = paca */
 	beq	1f
 	li	r7, TM_SPC_PULL_OS_CTX
 	li	r6, TM_QW1_OS
-	mfmsr	r0
-	andi.	r0, r0, MSR_DR		/* in real mode? */
+	/* Are we in virtual or real mode ? */
+	lbz	r0,HSTATE_EXIT_VIRT(r13)
+	cmpwi	r0,0
 	beq	2f
 	ld	r10, HSTATE_XIVE_TIMA_VIRT(r13)
 	cmpldi	cr0, r10, 0
@@ -2749,8 +2752,9 @@ kvm_cede_exit:
 	stb	r0, VCPU_CEDED(r9)
 1:	/* Enable XIVE escalation */
 	li	r5, XIVE_ESB_SET_PQ_00
-	mfmsr	r0
-	andi.	r0, r0, MSR_DR		/* in real mode? */
+	/* Are we in virtual or real mode ? */
+	lbz	r0,HSTATE_EXIT_VIRT(r13)
+	cmpwi	r0,0
 	beq	1f
 	ld	r10, VCPU_XIVE_ESC_VADDR(r9)
 	cmpdi	r10, 0
-- 
2.14.3


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* Re: [PATCH v2 10/11] powerpc/kvm: Store the MMU mode in the PACA on KVM exit
  2017-11-23  4:36   ` Benjamin Herrenschmidt
@ 2017-11-24 15:15     ` kbuild test robot
  -1 siblings, 0 replies; 36+ messages in thread
From: kbuild test robot @ 2017-11-24 15:15 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: kbuild-all, Paul Mackerras, kvm, kvm-ppc, Benjamin Herrenschmidt

[-- Attachment #1: Type: text/plain, Size: 2195 bytes --]

Hi Benjamin,

I love your patch! Yet something to improve:

[auto build test ERROR on powerpc/next]
[also build test ERROR on v4.14 next-20171124]
[cannot apply to kvm-ppc/kvm-ppc-next]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Benjamin-Herrenschmidt/powerpc-kvm-xive-Add-more-debugfs-queues-info/20171124-190202
base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: powerpc-defconfig (attached as .config)
compiler: powerpc64-linux-gnu-gcc (Debian 7.2.0-11) 7.2.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        make.cross ARCH=powerpc 

All errors (new ones prefixed by >>):

   arch/powerpc/kernel/exceptions-64s.S: Assembler messages:
>> arch/powerpc/kernel/exceptions-64s.S:803: Error: Fixed entry overflow
   arch/powerpc/kernel/exceptions-64s.S:803: Fatal error: .abort detected.  Abandoning ship.

vim +803 arch/powerpc/kernel/exceptions-64s.S

c78d9b974 Nicholas Piggin        2016-09-21  801  
a5d4f3ad3 Benjamin Herrenschmidt 2011-04-05  802  
1a6822d19 Nicholas Piggin        2016-12-06 @803  EXC_REAL_MASKABLE(decrementer, 0x900, 0x80)
1a6822d19 Nicholas Piggin        2016-12-06  804  EXC_VIRT_MASKABLE(decrementer, 0x4900, 0x80, 0x900)
39c0da57a Nicholas Piggin        2016-09-21  805  TRAMP_KVM(PACA_EXGEN, 0x900)
39c0da57a Nicholas Piggin        2016-09-21  806  EXC_COMMON_ASYNC(decrementer_common, 0x900, timer_interrupt)
39c0da57a Nicholas Piggin        2016-09-21  807  
a485c7098 Paul Mackerras         2013-04-25  808  

:::::: The code at line 803 was first introduced by commit
:::::: 1a6822d194c3f627eeb6aaca6688a5d0a444663e powerpc/64s: Use (start, size) rather than (start, end) for exception handlers

:::::: TO: Nicholas Piggin <npiggin@gmail.com>
:::::: CC: Michael Ellerman <mpe@ellerman.id.au>

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 24020 bytes --]

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v2 10/11] powerpc/kvm: Store the MMU mode in the PACA on KVM exit
@ 2017-11-24 15:15     ` kbuild test robot
  0 siblings, 0 replies; 36+ messages in thread
From: kbuild test robot @ 2017-11-24 15:15 UTC (permalink / raw)
  To: kvm-ppc

[-- Attachment #1: Type: text/plain, Size: 2195 bytes --]

Hi Benjamin,

I love your patch! Yet something to improve:

[auto build test ERROR on powerpc/next]
[also build test ERROR on v4.14 next-20171124]
[cannot apply to kvm-ppc/kvm-ppc-next]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Benjamin-Herrenschmidt/powerpc-kvm-xive-Add-more-debugfs-queues-info/20171124-190202
base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: powerpc-defconfig (attached as .config)
compiler: powerpc64-linux-gnu-gcc (Debian 7.2.0-11) 7.2.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        make.cross ARCH=powerpc 

All errors (new ones prefixed by >>):

   arch/powerpc/kernel/exceptions-64s.S: Assembler messages:
>> arch/powerpc/kernel/exceptions-64s.S:803: Error: Fixed entry overflow
   arch/powerpc/kernel/exceptions-64s.S:803: Fatal error: .abort detected.  Abandoning ship.

vim +803 arch/powerpc/kernel/exceptions-64s.S

c78d9b974 Nicholas Piggin        2016-09-21  801  
a5d4f3ad3 Benjamin Herrenschmidt 2011-04-05  802  
1a6822d19 Nicholas Piggin        2016-12-06 @803  EXC_REAL_MASKABLE(decrementer, 0x900, 0x80)
1a6822d19 Nicholas Piggin        2016-12-06  804  EXC_VIRT_MASKABLE(decrementer, 0x4900, 0x80, 0x900)
39c0da57a Nicholas Piggin        2016-09-21  805  TRAMP_KVM(PACA_EXGEN, 0x900)
39c0da57a Nicholas Piggin        2016-09-21  806  EXC_COMMON_ASYNC(decrementer_common, 0x900, timer_interrupt)
39c0da57a Nicholas Piggin        2016-09-21  807  
a485c7098 Paul Mackerras         2013-04-25  808  

:::::: The code at line 803 was first introduced by commit
:::::: 1a6822d194c3f627eeb6aaca6688a5d0a444663e powerpc/64s: Use (start, size) rather than (start, end) for exception handlers

:::::: TO: Nicholas Piggin <npiggin@gmail.com>
:::::: CC: Michael Ellerman <mpe@ellerman.id.au>

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 24020 bytes --]

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v2 08/11] powerpc/kvm/xive: Keep escalation interrupt masked unless ceded
  2017-11-23  4:36   ` Benjamin Herrenschmidt
@ 2017-11-25  4:56     ` Paul Mackerras
  -1 siblings, 0 replies; 36+ messages in thread
From: Paul Mackerras @ 2017-11-25  4:56 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: kvm, kvm-ppc

On Thu, Nov 23, 2017 at 03:36:16PM +1100, Benjamin Herrenschmidt wrote:
> This works on top of the single escalation support. When in single
> escalation, with this change, we will keep the escalation interrupt
> disabled unless the VCPU is in H_CEDE (idle). In any other case, we
> know the VCPU will be rescheduled and thus there is no need to take
> escalation interrupts in the host whenever a guest interrupt fires.

Some comments below...

> @@ -2705,7 +2740,32 @@ kvm_cede_prodded:
>  	/* we've ceded but we want to give control to the host */
>  kvm_cede_exit:
>  	ld	r9, HSTATE_KVM_VCPU(r13)
> -	b	guest_exit_cont
> +#ifdef CONFIG_KVM_XICS
> +	/* Abort if we still have a pending escalation */

This comment might be clearer if you say "Cancel cede" rather than
"Abort".

> diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
> index eef9ccafdc09..7a047bc88f11 100644
> --- a/arch/powerpc/kvm/book3s_xive.c
> +++ b/arch/powerpc/kvm/book3s_xive.c
> @@ -89,6 +89,17 @@ static irqreturn_t xive_esc_irq(int irq, void *data)
>  	if (vcpu->arch.ceded)
>  		kvmppc_fast_vcpu_kick(vcpu);
>  
> +	/* Since we have the no-EOI flag, the interrupt is effectively
> +	 * disabled now. Clearing xive_esc_on means we won't bother
> +	 * doing so on the next entry.
> +	 *
> +	 * This also allows the entry code to know that if a PQ combination
> +	 * of 10 is observed while xive_esc_on is true, it means the queue
> +	 * contains an unprocessed escalation interrupt. We don't make use of
> +	 * that knowledge today but might (see comment in book3s_hv_rmhandler.S)

Is "We don't make use of that knowledge" actually true?  I thought we
did make use of it in the assembly code in book3s_hv_rmhandlers.S (in
the code that this patch adds).  In what way don't we make use of it?

Paul.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v2 08/11] powerpc/kvm/xive: Keep escalation interrupt masked unless ceded
@ 2017-11-25  4:56     ` Paul Mackerras
  0 siblings, 0 replies; 36+ messages in thread
From: Paul Mackerras @ 2017-11-25  4:56 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: kvm, kvm-ppc

On Thu, Nov 23, 2017 at 03:36:16PM +1100, Benjamin Herrenschmidt wrote:
> This works on top of the single escalation support. When in single
> escalation, with this change, we will keep the escalation interrupt
> disabled unless the VCPU is in H_CEDE (idle). In any other case, we
> know the VCPU will be rescheduled and thus there is no need to take
> escalation interrupts in the host whenever a guest interrupt fires.

Some comments below...

> @@ -2705,7 +2740,32 @@ kvm_cede_prodded:
>  	/* we've ceded but we want to give control to the host */
>  kvm_cede_exit:
>  	ld	r9, HSTATE_KVM_VCPU(r13)
> -	b	guest_exit_cont
> +#ifdef CONFIG_KVM_XICS
> +	/* Abort if we still have a pending escalation */

This comment might be clearer if you say "Cancel cede" rather than
"Abort".

> diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
> index eef9ccafdc09..7a047bc88f11 100644
> --- a/arch/powerpc/kvm/book3s_xive.c
> +++ b/arch/powerpc/kvm/book3s_xive.c
> @@ -89,6 +89,17 @@ static irqreturn_t xive_esc_irq(int irq, void *data)
>  	if (vcpu->arch.ceded)
>  		kvmppc_fast_vcpu_kick(vcpu);
>  
> +	/* Since we have the no-EOI flag, the interrupt is effectively
> +	 * disabled now. Clearing xive_esc_on means we won't bother
> +	 * doing so on the next entry.
> +	 *
> +	 * This also allows the entry code to know that if a PQ combination
> +	 * of 10 is observed while xive_esc_on is true, it means the queue
> +	 * contains an unprocessed escalation interrupt. We don't make use of
> +	 * that knowledge today but might (see comment in book3s_hv_rmhandler.S)

Is "We don't make use of that knowledge" actually true?  I thought we
did make use of it in the assembly code in book3s_hv_rmhandlers.S (in
the code that this patch adds).  In what way don't we make use of it?

Paul.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v2 10/11] powerpc/kvm: Store the MMU mode in the PACA on KVM exit
  2017-11-23  4:36   ` Benjamin Herrenschmidt
@ 2017-11-25  5:03     ` Paul Mackerras
  -1 siblings, 0 replies; 36+ messages in thread
From: Paul Mackerras @ 2017-11-25  5:03 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: kvm, kvm-ppc

On Thu, Nov 23, 2017 at 03:36:18PM +1100, Benjamin Herrenschmidt wrote:
> This stores the MMU mode (real vs. virt) in the PACA on exceptions
> that do a KVM test.
> 
> At the moment, we do this unconditionally in those exceptions due
> to how the macro system is put together. In the future we could
> find a way to only do it when actually exiting a guest.
> 
> This will avoid a pile of mfmsr in the KVM exit path

Comments below...

> diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
> index 83596f32f50b..7775a278e56e 100644
> --- a/arch/powerpc/include/asm/kvm_book3s_asm.h
> +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
> @@ -118,6 +118,7 @@ struct kvmppc_host_state {
>  	void __iomem *xive_tima_phys;
>  	void __iomem *xive_tima_virt;
>  	u32 saved_xirr;
> +	u8 exit_virt;		/* MMU mode on exception exit */

This is being added in the #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
section of this struct, but as far as I can see we will need the new
field in the case where only PR and not HV is configured.  Did you try
compiling a PR-only config?

Paul.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v2 10/11] powerpc/kvm: Store the MMU mode in the PACA on KVM exit
@ 2017-11-25  5:03     ` Paul Mackerras
  0 siblings, 0 replies; 36+ messages in thread
From: Paul Mackerras @ 2017-11-25  5:03 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: kvm, kvm-ppc

On Thu, Nov 23, 2017 at 03:36:18PM +1100, Benjamin Herrenschmidt wrote:
> This stores the MMU mode (real vs. virt) in the PACA on exceptions
> that do a KVM test.
> 
> At the moment, we do this unconditionally in those exceptions due
> to how the macro system is put together. In the future we could
> find a way to only do it when actually exiting a guest.
> 
> This will avoid a pile of mfmsr in the KVM exit path

Comments below...

> diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
> index 83596f32f50b..7775a278e56e 100644
> --- a/arch/powerpc/include/asm/kvm_book3s_asm.h
> +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
> @@ -118,6 +118,7 @@ struct kvmppc_host_state {
>  	void __iomem *xive_tima_phys;
>  	void __iomem *xive_tima_virt;
>  	u32 saved_xirr;
> +	u8 exit_virt;		/* MMU mode on exception exit */

This is being added in the #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
section of this struct, but as far as I can see we will need the new
field in the case where only PR and not HV is configured.  Did you try
compiling a PR-only config?

Paul.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v2 11/11] powerpc/kvm: Use the PACA virt/real mode info instead of mfmsr
  2017-11-23  4:36   ` Benjamin Herrenschmidt
@ 2017-11-25  5:17     ` Paul Mackerras
  -1 siblings, 0 replies; 36+ messages in thread
From: Paul Mackerras @ 2017-11-25  5:17 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: kvm, kvm-ppc

On Thu, Nov 23, 2017 at 03:36:19PM +1100, Benjamin Herrenschmidt wrote:
> This shaves off a bunch of cycles from the KVM exit path
> and the XIVE related hypercall.

Comment below...

> diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> index 83862fba8cfb..ade13f7a9077 100644
> --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> @@ -81,6 +81,8 @@ _GLOBAL_TOC(kvmppc_hv_entry_trampoline)
>  	RFI
>  
>  kvmppc_call_hv_entry:
> +	li	r0,0
> +	stb	r0,HSTATE_EXIT_VIRT(r13)

Don't we need to clear HSTATE_EXIT_VIRT(r13) similarly in the entry
path for offline secondary CPU threads, i.e. somewhere around the
kvm_secondary_got_guest label?

Paul.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v2 11/11] powerpc/kvm: Use the PACA virt/real mode info instead of mfmsr
@ 2017-11-25  5:17     ` Paul Mackerras
  0 siblings, 0 replies; 36+ messages in thread
From: Paul Mackerras @ 2017-11-25  5:17 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: kvm, kvm-ppc

On Thu, Nov 23, 2017 at 03:36:19PM +1100, Benjamin Herrenschmidt wrote:
> This shaves off a bunch of cycles from the KVM exit path
> and the XIVE related hypercall.

Comment below...

> diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> index 83862fba8cfb..ade13f7a9077 100644
> --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> @@ -81,6 +81,8 @@ _GLOBAL_TOC(kvmppc_hv_entry_trampoline)
>  	RFI
>  
>  kvmppc_call_hv_entry:
> +	li	r0,0
> +	stb	r0,HSTATE_EXIT_VIRT(r13)

Don't we need to clear HSTATE_EXIT_VIRT(r13) similarly in the entry
path for offline secondary CPU threads, i.e. somewhere around the
kvm_secondary_got_guest label?

Paul.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v2 08/11] powerpc/kvm/xive: Keep escalation interrupt masked unless ceded
  2017-11-25  4:56     ` Paul Mackerras
@ 2017-11-26 21:55       ` Benjamin Herrenschmidt
  -1 siblings, 0 replies; 36+ messages in thread
From: Benjamin Herrenschmidt @ 2017-11-26 21:55 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: kvm, kvm-ppc

On Sat, 2017-11-25 at 15:56 +1100, Paul Mackerras wrote:
> On Thu, Nov 23, 2017 at 03:36:16PM +1100, Benjamin Herrenschmidt wrote:
> > This works on top of the single escalation support. When in single
> > escalation, with this change, we will keep the escalation interrupt
> > disabled unless the VCPU is in H_CEDE (idle). In any other case, we
> > know the VCPU will be rescheduled and thus there is no need to take
> > escalation interrupts in the host whenever a guest interrupt fires.
> 
> Some comments below...
> 
> > @@ -2705,7 +2740,32 @@ kvm_cede_prodded:
> >  	/* we've ceded but we want to give control to the host */
> >  kvm_cede_exit:
> >  	ld	r9, HSTATE_KVM_VCPU(r13)
> > -	b	guest_exit_cont
> > +#ifdef CONFIG_KVM_XICS
> > +	/* Abort if we still have a pending escalation */
> 
> This comment might be clearer if you say "Cancel cede" rather than
> "Abort".
> 
> > diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
> > index eef9ccafdc09..7a047bc88f11 100644
> > --- a/arch/powerpc/kvm/book3s_xive.c
> > +++ b/arch/powerpc/kvm/book3s_xive.c
> > @@ -89,6 +89,17 @@ static irqreturn_t xive_esc_irq(int irq, void *data)
> >  	if (vcpu->arch.ceded)
> >  		kvmppc_fast_vcpu_kick(vcpu);
> >  
> > +	/* Since we have the no-EOI flag, the interrupt is effectively
> > +	 * disabled now. Clearing xive_esc_on means we won't bother
> > +	 * doing so on the next entry.
> > +	 *
> > +	 * This also allows the entry code to know that if a PQ combination
> > +	 * of 10 is observed while xive_esc_on is true, it means the queue
> > +	 * contains an unprocessed escalation interrupt. We don't make use of
> > +	 * that knowledge today but might (see comment in book3s_hv_rmhandler.S)
> 
> Is "We don't make use of that knowledge" actually true?  I thought we
> did make use of it in the assembly code in book3s_hv_rmhandlers.S (in
> the code that this patch adds).  In what way don't we make use of it?

Obsolete comment, sorry.

Cheers,
Ben.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v2 08/11] powerpc/kvm/xive: Keep escalation interrupt masked unless ceded
@ 2017-11-26 21:55       ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 36+ messages in thread
From: Benjamin Herrenschmidt @ 2017-11-26 21:55 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: kvm, kvm-ppc

On Sat, 2017-11-25 at 15:56 +1100, Paul Mackerras wrote:
> On Thu, Nov 23, 2017 at 03:36:16PM +1100, Benjamin Herrenschmidt wrote:
> > This works on top of the single escalation support. When in single
> > escalation, with this change, we will keep the escalation interrupt
> > disabled unless the VCPU is in H_CEDE (idle). In any other case, we
> > know the VCPU will be rescheduled and thus there is no need to take
> > escalation interrupts in the host whenever a guest interrupt fires.
> 
> Some comments below...
> 
> > @@ -2705,7 +2740,32 @@ kvm_cede_prodded:
> >  	/* we've ceded but we want to give control to the host */
> >  kvm_cede_exit:
> >  	ld	r9, HSTATE_KVM_VCPU(r13)
> > -	b	guest_exit_cont
> > +#ifdef CONFIG_KVM_XICS
> > +	/* Abort if we still have a pending escalation */
> 
> This comment might be clearer if you say "Cancel cede" rather than
> "Abort".
> 
> > diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
> > index eef9ccafdc09..7a047bc88f11 100644
> > --- a/arch/powerpc/kvm/book3s_xive.c
> > +++ b/arch/powerpc/kvm/book3s_xive.c
> > @@ -89,6 +89,17 @@ static irqreturn_t xive_esc_irq(int irq, void *data)
> >  	if (vcpu->arch.ceded)
> >  		kvmppc_fast_vcpu_kick(vcpu);
> >  
> > +	/* Since we have the no-EOI flag, the interrupt is effectively
> > +	 * disabled now. Clearing xive_esc_on means we won't bother
> > +	 * doing so on the next entry.
> > +	 *
> > +	 * This also allows the entry code to know that if a PQ combination
> > +	 * of 10 is observed while xive_esc_on is true, it means the queue
> > +	 * contains an unprocessed escalation interrupt. We don't make use of
> > +	 * that knowledge today but might (see comment in book3s_hv_rmhandler.S)
> 
> Is "We don't make use of that knowledge" actually true?  I thought we
> did make use of it in the assembly code in book3s_hv_rmhandlers.S (in
> the code that this patch adds).  In what way don't we make use of it?

Obsolete comment, sorry.

Cheers,
Ben.


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v2 10/11] powerpc/kvm: Store the MMU mode in the PACA on KVM exit
  2017-11-25  5:03     ` Paul Mackerras
@ 2017-11-26 21:57       ` Benjamin Herrenschmidt
  -1 siblings, 0 replies; 36+ messages in thread
From: Benjamin Herrenschmidt @ 2017-11-26 21:57 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: kvm, kvm-ppc

On Sat, 2017-11-25 at 16:03 +1100, Paul Mackerras wrote:
> On Thu, Nov 23, 2017 at 03:36:18PM +1100, Benjamin Herrenschmidt wrote:
> > This stores the MMU mode (real vs. virt) in the PACA on exceptions
> > that do a KVM test.
> > 
> > At the moment, we do this unconditionally in those exceptions due
> > to how the macro system is put together. In the future we could
> > find a way to only do it when actually exiting a guest.
> > 
> > This will avoid a pile of mfmsr in the KVM exit path
> 
> Comments below...
> 
> > diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
> > index 83596f32f50b..7775a278e56e 100644
> > --- a/arch/powerpc/include/asm/kvm_book3s_asm.h
> > +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
> > @@ -118,6 +118,7 @@ struct kvmppc_host_state {
> >  	void __iomem *xive_tima_phys;
> >  	void __iomem *xive_tima_virt;
> >  	u32 saved_xirr;
> > +	u8 exit_virt;		/* MMU mode on exception exit */
> 
> This is being added in the #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
> section of this struct, but as far as I can see we will need the new
> field in the case where only PR and not HV is configured.  Did you try
> compiling a PR-only config?

Nope. There is another problem is that it causes the fixed handler to
be to big for at least one interrupt (haven't check which one, it's the
build bot who notified me). I need to re-think that. I might have to
either do separate do_* entry points or something like that too...

Ben.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v2 10/11] powerpc/kvm: Store the MMU mode in the PACA on KVM exit
@ 2017-11-26 21:57       ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 36+ messages in thread
From: Benjamin Herrenschmidt @ 2017-11-26 21:57 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: kvm, kvm-ppc

On Sat, 2017-11-25 at 16:03 +1100, Paul Mackerras wrote:
> On Thu, Nov 23, 2017 at 03:36:18PM +1100, Benjamin Herrenschmidt wrote:
> > This stores the MMU mode (real vs. virt) in the PACA on exceptions
> > that do a KVM test.
> > 
> > At the moment, we do this unconditionally in those exceptions due
> > to how the macro system is put together. In the future we could
> > find a way to only do it when actually exiting a guest.
> > 
> > This will avoid a pile of mfmsr in the KVM exit path
> 
> Comments below...
> 
> > diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
> > index 83596f32f50b..7775a278e56e 100644
> > --- a/arch/powerpc/include/asm/kvm_book3s_asm.h
> > +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
> > @@ -118,6 +118,7 @@ struct kvmppc_host_state {
> >  	void __iomem *xive_tima_phys;
> >  	void __iomem *xive_tima_virt;
> >  	u32 saved_xirr;
> > +	u8 exit_virt;		/* MMU mode on exception exit */
> 
> This is being added in the #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
> section of this struct, but as far as I can see we will need the new
> field in the case where only PR and not HV is configured.  Did you try
> compiling a PR-only config?

Nope. There is another problem is that it causes the fixed handler to
be to big for at least one interrupt (haven't check which one, it's the
build bot who notified me). I need to re-think that. I might have to
either do separate do_* entry points or something like that too...

Ben.


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v2 11/11] powerpc/kvm: Use the PACA virt/real mode info instead of mfmsr
  2017-11-25  5:17     ` Paul Mackerras
@ 2017-11-26 21:58       ` Benjamin Herrenschmidt
  -1 siblings, 0 replies; 36+ messages in thread
From: Benjamin Herrenschmidt @ 2017-11-26 21:58 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: kvm, kvm-ppc

On Sat, 2017-11-25 at 16:17 +1100, Paul Mackerras wrote:
> On Thu, Nov 23, 2017 at 03:36:19PM +1100, Benjamin Herrenschmidt wrote:
> > This shaves off a bunch of cycles from the KVM exit path
> > and the XIVE related hypercall.
> 
> Comment below...
> 
> > diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> > index 83862fba8cfb..ade13f7a9077 100644
> > --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> > +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> > @@ -81,6 +81,8 @@ _GLOBAL_TOC(kvmppc_hv_entry_trampoline)
> >  	RFI
> >  
> >  kvmppc_call_hv_entry:
> > +	li	r0,0
> > +	stb	r0,HSTATE_EXIT_VIRT(r13)
> 
> Don't we need to clear HSTATE_EXIT_VIRT(r13) similarly in the entry
> path for offline secondary CPU threads, i.e. somewhere around the
> kvm_secondary_got_guest label?

Quite possibly, I am not too familiar with that path. I'll have a look.

Cheers,
Ben.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v2 11/11] powerpc/kvm: Use the PACA virt/real mode info instead of mfmsr
@ 2017-11-26 21:58       ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 36+ messages in thread
From: Benjamin Herrenschmidt @ 2017-11-26 21:58 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: kvm, kvm-ppc

On Sat, 2017-11-25 at 16:17 +1100, Paul Mackerras wrote:
> On Thu, Nov 23, 2017 at 03:36:19PM +1100, Benjamin Herrenschmidt wrote:
> > This shaves off a bunch of cycles from the KVM exit path
> > and the XIVE related hypercall.
> 
> Comment below...
> 
> > diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> > index 83862fba8cfb..ade13f7a9077 100644
> > --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> > +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> > @@ -81,6 +81,8 @@ _GLOBAL_TOC(kvmppc_hv_entry_trampoline)
> >  	RFI
> >  
> >  kvmppc_call_hv_entry:
> > +	li	r0,0
> > +	stb	r0,HSTATE_EXIT_VIRT(r13)
> 
> Don't we need to clear HSTATE_EXIT_VIRT(r13) similarly in the entry
> path for offline secondary CPU threads, i.e. somewhere around the
> kvm_secondary_got_guest label?

Quite possibly, I am not too familiar with that path. I'll have a look.

Cheers,
Ben.


^ permalink raw reply	[flat|nested] 36+ messages in thread

end of thread, other threads:[~2017-11-26 21:58 UTC | newest]

Thread overview: 36+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-11-23  4:36 [PATCH v2 01/11] powerpc/kvm/xive: Add more debugfs queues info Benjamin Herrenschmidt
2017-11-23  4:36 ` Benjamin Herrenschmidt
2017-11-23  4:36 ` [PATCH v2 02/11] powerpc/kvm/xive: Enable use of the new "single escalation" feature Benjamin Herrenschmidt
2017-11-23  4:36   ` Benjamin Herrenschmidt
2017-11-23  4:36 ` [PATCH v2 03/11] powerpc/kvm/xive: Don't use existing "prodded" flag for xive escalations Benjamin Herrenschmidt
2017-11-23  4:36   ` Benjamin Herrenschmidt
2017-11-23  4:36 ` [PATCH v2 04/11] powerpc/kvm/xive: Check DR not IR to chose real vs virt mode MMIOs Benjamin Herrenschmidt
2017-11-23  4:36   ` Benjamin Herrenschmidt
2017-11-23  4:36 ` [PATCH v2 05/11] powerpc/kvm/xive: Make xive_pushed a byte, not a word Benjamin Herrenschmidt
2017-11-23  4:36   ` Benjamin Herrenschmidt
2017-11-23  4:36 ` [PATCH v2 06/11] powerpc/xive: Move definition of ESB bits Benjamin Herrenschmidt
2017-11-23  4:36   ` Benjamin Herrenschmidt
2017-11-23  4:36 ` [PATCH v2 07/11] powerpc/xive: Add interrupt flag to disable automatic EOI Benjamin Herrenschmidt
2017-11-23  4:36   ` Benjamin Herrenschmidt
2017-11-23  4:36 ` [PATCH v2 08/11] powerpc/kvm/xive: Keep escalation interrupt masked unless ceded Benjamin Herrenschmidt
2017-11-23  4:36   ` Benjamin Herrenschmidt
2017-11-25  4:56   ` Paul Mackerras
2017-11-25  4:56     ` Paul Mackerras
2017-11-26 21:55     ` Benjamin Herrenschmidt
2017-11-26 21:55       ` Benjamin Herrenschmidt
2017-11-23  4:36 ` [PATCH v2 09/11] powerpc/kvm: Make "no_xive:" label local Benjamin Herrenschmidt
2017-11-23  4:36   ` Benjamin Herrenschmidt
2017-11-23  4:36 ` [PATCH v2 10/11] powerpc/kvm: Store the MMU mode in the PACA on KVM exit Benjamin Herrenschmidt
2017-11-23  4:36   ` Benjamin Herrenschmidt
2017-11-24 15:15   ` kbuild test robot
2017-11-24 15:15     ` kbuild test robot
2017-11-25  5:03   ` Paul Mackerras
2017-11-25  5:03     ` Paul Mackerras
2017-11-26 21:57     ` Benjamin Herrenschmidt
2017-11-26 21:57       ` Benjamin Herrenschmidt
2017-11-23  4:36 ` [PATCH v2 11/11] powerpc/kvm: Use the PACA virt/real mode info instead of mfmsr Benjamin Herrenschmidt
2017-11-23  4:36   ` Benjamin Herrenschmidt
2017-11-25  5:17   ` Paul Mackerras
2017-11-25  5:17     ` Paul Mackerras
2017-11-26 21:58     ` Benjamin Herrenschmidt
2017-11-26 21:58       ` Benjamin Herrenschmidt

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.