kvm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Bharata B Rao <bharata@linux.ibm.com>
To: kvm-ppc@vger.kernel.org, linuxppc-dev@lists.ozlabs.org
Cc: kvm@vger.kernel.org, aneesh.kumar@linux.ibm.com,
	bharata.rao@gmail.com, Bharata B Rao <bharata@linux.ibm.com>
Subject: [RFC PATCH v0 5/5] pseries: Asynchronous page fault support
Date: Thu,  5 Aug 2021 12:54:39 +0530	[thread overview]
Message-ID: <20210805072439.501481-6-bharata@linux.ibm.com> (raw)
In-Reply-To: <20210805072439.501481-1-bharata@linux.ibm.com>

Add asynchronous page fault support for pseries guests.

1. Setup the guest to handle async-pf
   - Issue H_REG_SNS hcall to register the SNS region.
   - Setup the subvention interrupt irq.
   - Enable async-pf by updating the byte_b9 of VPA for each
     CPU.
2. Check if the page fault is an expropriation notification
   (SRR1_PROGTRAP set in SRR1) and if so put the task on
   wait queue based on the expropriation correlation number
   read from the VPA.
3. Handle subvention interrupt to wake any waiting tasks.
   The wait and wakeup mechanism from x86 async-pf implementation
   is being reused here.

TODO:
- Check how to keep this feature together with other CMO features.
- The async-pf check in the page fault handler path is limited to
  guest with an #ifdef. This isn't sufficient and hence needs to
  be replaced by an appropriate check.

Signed-off-by: Bharata B Rao <bharata@linux.ibm.com>
---
 arch/powerpc/include/asm/async-pf.h       |  12 ++
 arch/powerpc/mm/fault.c                   |   7 +-
 arch/powerpc/platforms/pseries/Makefile   |   2 +-
 arch/powerpc/platforms/pseries/async-pf.c | 219 ++++++++++++++++++++++
 4 files changed, 238 insertions(+), 2 deletions(-)
 create mode 100644 arch/powerpc/include/asm/async-pf.h
 create mode 100644 arch/powerpc/platforms/pseries/async-pf.c

diff --git a/arch/powerpc/include/asm/async-pf.h b/arch/powerpc/include/asm/async-pf.h
new file mode 100644
index 000000000000..95d6c3da9f50
--- /dev/null
+++ b/arch/powerpc/include/asm/async-pf.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Async page fault support via PAPR Expropriation/Subvention Notification
+ * option(ESN)
+ *
+ * Copyright 2020 Bharata B Rao, IBM Corp. <bharata@linux.ibm.com>
+ */
+
+#ifndef _ASM_POWERPC_ASYNC_PF_H
+int handle_async_page_fault(struct pt_regs *regs, unsigned long addr);
+#define _ASM_POWERPC_ASYNC_PF_H
+#endif
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index a8d0ce85d39a..bbdc61605885 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -44,7 +44,7 @@
 #include <asm/debug.h>
 #include <asm/kup.h>
 #include <asm/inst.h>
-
+#include <asm/async-pf.h>
 
 /*
  * do_page_fault error handling helpers
@@ -395,6 +395,11 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
 	vm_fault_t fault, major = 0;
 	bool kprobe_fault = kprobe_page_fault(regs, 11);
 
+#ifdef CONFIG_PPC_PSERIES
+	if (handle_async_page_fault(regs, address))
+		return 0;
+#endif
+
 	if (unlikely(debugger_fault_handler(regs) || kprobe_fault))
 		return 0;
 
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index 4cda0ef87be0..e0ada605ef20 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -6,7 +6,7 @@ obj-y			:= lpar.o hvCall.o nvram.o reconfig.o \
 			   of_helpers.o \
 			   setup.o iommu.o event_sources.o ras.o \
 			   firmware.o power.o dlpar.o mobility.o rng.o \
-			   pci.o pci_dlpar.o eeh_pseries.o msi.o
+			   pci.o pci_dlpar.o eeh_pseries.o msi.o async-pf.o
 obj-$(CONFIG_SMP)	+= smp.o
 obj-$(CONFIG_SCANLOG)	+= scanlog.o
 obj-$(CONFIG_KEXEC_CORE)	+= kexec.o
diff --git a/arch/powerpc/platforms/pseries/async-pf.c b/arch/powerpc/platforms/pseries/async-pf.c
new file mode 100644
index 000000000000..c2f3bbc0d674
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/async-pf.c
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Async page fault support via PAPR Expropriation/Subvention Notification
+ * option(ESN)
+ *
+ * Copyright 2020 Bharata B Rao, IBM Corp. <bharata@linux.ibm.com>
+ */
+
+#include <linux/interrupt.h>
+#include <linux/swait.h>
+#include <linux/irqdomain.h>
+#include <asm/machdep.h>
+#include <asm/hvcall.h>
+#include <asm/paca.h>
+
+static char sns_buffer[PAGE_SIZE] __aligned(4096);
+static uint16_t *esn_q = (uint16_t *)sns_buffer + 1;
+static unsigned long next_eq_entry, nr_eq_entries;
+
+#define ASYNC_PF_SLEEP_HASHBITS 8
+#define ASYNC_PF_SLEEP_HASHSIZE (1<<ASYNC_PF_SLEEP_HASHBITS)
+
+/* Controls access to SNS buffer */
+static DEFINE_RAW_SPINLOCK(async_sns_guest_lock);
+
+/* Wait queue handling is from x86 asyn-pf implementation */
+struct async_pf_sleep_node {
+	struct hlist_node link;
+	struct swait_queue_head wq;
+	u64 token;
+	int cpu;
+};
+
+static struct async_pf_sleep_head {
+	raw_spinlock_t lock;
+	struct hlist_head list;
+} async_pf_sleepers[ASYNC_PF_SLEEP_HASHSIZE];
+
+static struct async_pf_sleep_node *_find_apf_task(struct async_pf_sleep_head *b,
+						  u64 token)
+{
+	struct hlist_node *p;
+
+	hlist_for_each(p, &b->list) {
+		struct async_pf_sleep_node *n =
+			hlist_entry(p, typeof(*n), link);
+		if (n->token == token)
+			return n;
+	}
+
+	return NULL;
+}
+static int async_pf_queue_task(u64 token, struct async_pf_sleep_node *n)
+{
+	u64 key = hash_64(token, ASYNC_PF_SLEEP_HASHBITS);
+	struct async_pf_sleep_head *b = &async_pf_sleepers[key];
+	struct async_pf_sleep_node *e;
+
+	raw_spin_lock(&b->lock);
+	e = _find_apf_task(b, token);
+	if (e) {
+		/* dummy entry exist -> wake up was delivered ahead of PF */
+		hlist_del(&e->link);
+		raw_spin_unlock(&b->lock);
+		kfree(e);
+		return false;
+	}
+
+	n->token = token;
+	n->cpu = smp_processor_id();
+	init_swait_queue_head(&n->wq);
+	hlist_add_head(&n->link, &b->list);
+	raw_spin_unlock(&b->lock);
+	return true;
+}
+
+/*
+ * Handle Expropriation notification.
+ */
+int handle_async_page_fault(struct pt_regs *regs, unsigned long addr)
+{
+	struct async_pf_sleep_node n;
+	DECLARE_SWAITQUEUE(wait);
+	unsigned long exp_corr_nr;
+
+	/* Is this Expropriation notification? */
+	if (!(mfspr(SPRN_SRR1) & SRR1_PROGTRAP))
+		return 0;
+
+	if (unlikely(!user_mode(regs)))
+		panic("Host injected async PF in kernel mode\n");
+
+	exp_corr_nr = be16_to_cpu(get_lppaca()->exp_corr_nr);
+	if (!async_pf_queue_task(exp_corr_nr, &n))
+		return 0;
+
+	for (;;) {
+		prepare_to_swait_exclusive(&n.wq, &wait, TASK_UNINTERRUPTIBLE);
+		if (hlist_unhashed(&n.link))
+			break;
+
+		local_irq_enable();
+		schedule();
+		local_irq_disable();
+	}
+
+	finish_swait(&n.wq, &wait);
+	return 1;
+}
+
+static void apf_task_wake_one(struct async_pf_sleep_node *n)
+{
+	hlist_del_init(&n->link);
+	if (swq_has_sleeper(&n->wq))
+		swake_up_one(&n->wq);
+}
+
+static void async_pf_wake_task(u64 token)
+{
+	u64 key = hash_64(token, ASYNC_PF_SLEEP_HASHBITS);
+	struct async_pf_sleep_head *b = &async_pf_sleepers[key];
+	struct async_pf_sleep_node *n;
+
+again:
+	raw_spin_lock(&b->lock);
+	n = _find_apf_task(b, token);
+	if (!n) {
+		/*
+		 * async PF was not yet handled.
+		 * Add dummy entry for the token.
+		 */
+		n = kzalloc(sizeof(*n), GFP_ATOMIC);
+		if (!n) {
+			/*
+			 * Allocation failed! Busy wait while other cpu
+			 * handles async PF.
+			 */
+			raw_spin_unlock(&b->lock);
+			cpu_relax();
+			goto again;
+		}
+		n->token = token;
+		n->cpu = smp_processor_id();
+		init_swait_queue_head(&n->wq);
+		hlist_add_head(&n->link, &b->list);
+	} else {
+		apf_task_wake_one(n);
+	}
+	raw_spin_unlock(&b->lock);
+}
+
+/*
+ * Handle Subvention notification.
+ */
+static irqreturn_t async_pf_handler(int irq, void *dev_id)
+{
+	uint16_t exp_token, old;
+
+	raw_spin_lock(&async_sns_guest_lock);
+	do {
+		exp_token = *(esn_q + next_eq_entry);
+		if (!exp_token)
+			break;
+
+		old = arch_cmpxchg(esn_q + next_eq_entry, exp_token, 0);
+		BUG_ON(old != exp_token);
+
+		async_pf_wake_task(exp_token);
+		next_eq_entry = (next_eq_entry + 1) % nr_eq_entries;
+	} while (1);
+	raw_spin_unlock(&async_sns_guest_lock);
+	return IRQ_HANDLED;
+}
+
+static int __init pseries_async_pf_init(void)
+{
+	long rc;
+	unsigned long ret[PLPAR_HCALL_BUFSIZE];
+	unsigned int irq, cpu;
+	int i;
+
+	/* Register buffer via H_REG_SNS */
+	rc = plpar_hcall(H_REG_SNS, ret, __pa(sns_buffer), PAGE_SIZE);
+	if (rc != H_SUCCESS)
+		return -1;
+
+	nr_eq_entries = (PAGE_SIZE - 2) / sizeof(uint16_t);
+
+	/* Register irq handler */
+	irq = irq_create_mapping(NULL, ret[1]);
+	if (!irq) {
+		plpar_hcall(H_REG_SNS, ret, -1, PAGE_SIZE);
+		return -1;
+	}
+
+	rc = request_irq(irq, async_pf_handler, 0, "sns-interrupt", NULL);
+	if (rc < 0) {
+		plpar_hcall(H_REG_SNS, ret, -1, PAGE_SIZE);
+		return -1;
+	}
+
+	for (i = 0; i < ASYNC_PF_SLEEP_HASHSIZE; i++)
+		raw_spin_lock_init(&async_pf_sleepers[i].lock);
+
+	/*
+	 * Enable subvention notifications from the hypervisor
+	 * by setting bit 0, byte 0 of SNS buffer
+	 */
+	*sns_buffer |= 0x1;
+
+	/* Enable LPPACA_EXP_INT_ENABLED in VPA */
+	for_each_possible_cpu(cpu)
+		lppaca_of(cpu).byte_b9 |= LPPACA_EXP_INT_ENABLED;
+
+	pr_err("%s: Enabled Async PF\n", __func__);
+	return 0;
+}
+
+machine_arch_initcall(pseries, pseries_async_pf_init);
-- 
2.31.1


  parent reply	other threads:[~2021-08-05  7:25 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-05  7:24 [RFC PATCH v0 0/5] PPC: KVM: pseries: Asynchronous page fault Bharata B Rao
2021-08-05  7:24 ` [RFC PATCH v0 1/5] powerpc: Define Expropriation interrupt bit to VPA byte offset 0xB9 Bharata B Rao
2021-08-05  7:24 ` [RFC PATCH v0 2/5] KVM: PPC: Add support for KVM_REQ_ESN_EXIT Bharata B Rao
2021-08-05  7:24 ` [RFC PATCH v0 3/5] KVM: PPC: Book3S: Enable setting SRR1 flags for DSI Bharata B Rao
2021-08-05  7:24 ` [RFC PATCH v0 4/5] KVM: PPC: BOOK3S HV: Async PF support Bharata B Rao
2021-08-05  7:24 ` Bharata B Rao [this message]
2021-08-13  4:06   ` [RFC PATCH v0 5/5] pseries: Asynchronous page fault support Nicholas Piggin
2021-08-13  4:54     ` Bharata B Rao
2021-08-05  7:35 ` [RFC PATCH v0 0/5] PPC: KVM: pseries: Asynchronous page fault Bharata B Rao

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210805072439.501481-6-bharata@linux.ibm.com \
    --to=bharata@linux.ibm.com \
    --cc=aneesh.kumar@linux.ibm.com \
    --cc=bharata.rao@gmail.com \
    --cc=kvm-ppc@vger.kernel.org \
    --cc=kvm@vger.kernel.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).