All of lore.kernel.org
 help / color / mirror / Atom feed
From: Gavin Shan <gshan@redhat.com>
To: kvmarm@lists.cs.columbia.edu
Cc: maz@kernel.org, sudeep.holla@arm.com, shan.gavin@gmail.com,
	catalin.marinas@arm.com, will@kernel.org,
	linux-arm-kernel@lists.infradead.org
Subject: [PATCH RFCv1 7/7] arm64: Support async page fault
Date: Fri, 10 Apr 2020 18:58:20 +1000	[thread overview]
Message-ID: <20200410085820.758686-8-gshan@redhat.com> (raw)
In-Reply-To: <20200410085820.758686-1-gshan@redhat.com>

This supports asynchronous page fault for the guest. The design is
similar to what x86 has: on receiving a PAGE_NOT_PRESENT signal from
the hypervisor, the current task is either rescheduled or put into
power-saving mode. The task will be waken up when PAGE_READY signal
is received.

The signals are conveyed through data abort with specific (IMPDEF)
Data Fault Status Code (DFSC). Besides, a hash table is introduced
to track the processes that have been put into waiting state, to
avoid out-of-consistency.

The feature is put into the CONFIG_KVM_GUEST umbrella, which is added
by this patch.

Signed-off-by: Gavin Shan <gshan@redhat.com>
---
 arch/arm64/Kconfig                 |  11 ++
 arch/arm64/include/asm/exception.h |   5 +
 arch/arm64/include/asm/kvm_para.h  |  42 ++++-
 arch/arm64/kernel/smp.c            |  47 ++++++
 arch/arm64/mm/fault.c              | 239 ++++++++++++++++++++++++++++-
 5 files changed, 336 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 40fb05d96c60..2d5e5ee62d6d 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1045,6 +1045,17 @@ config PARAVIRT
 	  under a hypervisor, potentially improving performance significantly
 	  over full virtualization.
 
+config KVM_GUEST
+	bool "KVM Guest Support"
+	depends on PARAVIRT
+	default y
+	help
+	  This option enables various optimizations for running under the KVM
+	  hypervisor. Overhead for the kernel when not running inside KVM should
+	  be minimal.
+
+	  In case of doubt, say Y
+
 config PARAVIRT_TIME_ACCOUNTING
 	bool "Paravirtual steal time accounting"
 	select PARAVIRT
diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h
index 7a6e81ca23a8..17ac2db36472 100644
--- a/arch/arm64/include/asm/exception.h
+++ b/arch/arm64/include/asm/exception.h
@@ -46,4 +46,9 @@ void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr);
 void do_cp15instr(unsigned int esr, struct pt_regs *regs);
 void do_el0_svc(struct pt_regs *regs);
 void do_el0_svc_compat(struct pt_regs *regs);
+
+#ifdef CONFIG_KVM_GUEST
+void kvm_pv_async_pf_enable(void);
+void kvm_pv_async_pf_disable(void);
+#endif /* CONFIG_KVM_GUEST */
 #endif	/* __ASM_EXCEPTION_H */
diff --git a/arch/arm64/include/asm/kvm_para.h b/arch/arm64/include/asm/kvm_para.h
index 0ea481dd1c7a..a43bed479c2b 100644
--- a/arch/arm64/include/asm/kvm_para.h
+++ b/arch/arm64/include/asm/kvm_para.h
@@ -3,6 +3,30 @@
 #define _ASM_ARM_KVM_PARA_H
 
 #include <uapi/asm/kvm_para.h>
+#include <linux/of.h>
+
+#ifdef CONFIG_KVM_GUEST
+static inline int kvm_para_available(void)
+{
+	struct device_node *hyper_node;
+	int ret = 0;
+
+	hyper_node = of_find_node_by_path("/hypervisor");
+	if (!hyper_node)
+		return 0;
+
+	if (of_device_is_compatible(hyper_node, "linux,kvm"))
+		ret = 1;
+
+	of_node_put(hyper_node);
+	return ret;
+}
+#else
+static inline int kvm_para_available(void)
+{
+	return 0;
+}
+#endif /* CONFIG_KVM_GUEST */
 
 static inline bool kvm_check_and_clear_guest_paused(void)
 {
@@ -11,17 +35,21 @@ static inline bool kvm_check_and_clear_guest_paused(void)
 
 static inline unsigned int kvm_arch_para_features(void)
 {
-	return 0;
+	struct device_node *hyper_node;
+	unsigned int features = 0;
+
+	if (!kvm_para_available())
+		return 0;
+
+	hyper_node = of_find_node_by_path("/hypervisor");
+	of_property_read_u32(hyper_node, "para-features", &features);
+	of_node_put(hyper_node);
+
+	return features;
 }
 
 static inline unsigned int kvm_arch_para_hints(void)
 {
 	return 0;
 }
-
-static inline bool kvm_para_available(void)
-{
-	return false;
-}
-
 #endif /* _ASM_ARM_KVM_PARA_H */
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 061f60fe452f..cc97a8462d7f 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -40,6 +40,7 @@
 #include <asm/cputype.h>
 #include <asm/cpu_ops.h>
 #include <asm/daifflags.h>
+#include <asm/exception.h>
 #include <asm/kvm_mmu.h>
 #include <asm/mmu_context.h>
 #include <asm/numa.h>
@@ -443,6 +444,38 @@ void __init smp_cpus_done(unsigned int max_cpus)
 	mark_linear_text_alias_ro();
 }
 
+#ifdef CONFIG_KVM_GUEST
+static void kvm_cpu_reboot(void *unused)
+{
+	kvm_pv_async_pf_disable();
+}
+
+static int kvm_cpu_reboot_notify(struct notifier_block *nb,
+				 unsigned long code, void *unused)
+{
+	if (code == SYS_RESTART)
+		on_each_cpu(kvm_cpu_reboot, NULL, 1);
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block kvm_cpu_reboot_nb = {
+	.notifier_call = kvm_cpu_reboot_notify,
+};
+
+static int kvm_cpu_online(unsigned int cpu)
+{
+	kvm_pv_async_pf_enable();
+	return 0;
+}
+
+static int kvm_cpu_offline(unsigned int cpu)
+{
+	kvm_pv_async_pf_disable();
+	return 0;
+}
+#endif /* CONFIG_KVM_GUEST */
+
 void __init smp_prepare_boot_cpu(void)
 {
 	set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
@@ -458,6 +491,20 @@ void __init smp_prepare_boot_cpu(void)
 	/* Conditionally switch to GIC PMR for interrupt masking */
 	if (system_uses_irq_prio_masking())
 		init_gic_priority_masking();
+
+
+	/* Enable async page fault */
+#ifdef CONFIG_KVM_GUEST
+	register_reboot_notifier(&kvm_cpu_reboot_nb);
+	if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
+		"arm/kvm:online", kvm_cpu_online, kvm_cpu_offline) < 0) {
+		pr_warn("%s: Failed to install cpu hotplug callbacks\n",
+			__func__);
+		return;
+	}
+
+	kvm_pv_async_pf_enable();
+#endif /* CONFIG_KVM_GUEST */
 }
 
 static u64 __init of_get_cpu_mpidr(struct device_node *dn)
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 1027851d469a..39c7570fe303 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -19,10 +19,12 @@
 #include <linux/page-flags.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/debug.h>
+#include <linux/swait.h>
 #include <linux/highmem.h>
 #include <linux/perf_event.h>
 #include <linux/preempt.h>
 #include <linux/hugetlb.h>
+#include <linux/kvm_para.h>
 
 #include <asm/acpi.h>
 #include <asm/bug.h>
@@ -48,8 +50,31 @@ struct fault_info {
 	const char *name;
 };
 
+#ifdef CONFIG_KVM_GUEST
+#define KVM_TASK_SLEEP_HASHBITS		8
+#define KVM_TASK_SLEEP_HASHSIZE	(1 << KVM_TASK_SLEEP_HASHBITS)
+
+struct kvm_task_sleep_node {
+	struct hlist_node	link;
+	struct swait_queue_head	wq;
+	u32			token;
+	int			cpu;
+	bool			halted;
+};
+
+struct kvm_task_sleep_head {
+	raw_spinlock_t		lock;
+	struct hlist_head	list;
+};
+#endif /* CONFIG_KVM_GUEST */
+
 static const struct fault_info fault_info[];
 static struct fault_info debug_fault_info[];
+#ifdef CONFIG_KVM_GUEST
+static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_data) __aligned(64);
+static struct kvm_task_sleep_head async_pf_sleepers[KVM_TASK_SLEEP_HASHSIZE];
+static bool async_pf_initialized;
+#endif
 
 static inline const struct fault_info *esr_to_fault_info(unsigned int esr)
 {
@@ -623,6 +648,178 @@ static int do_alignment_fault(unsigned long addr, unsigned int esr,
 	return 0;
 }
 
+#ifdef CONFIG_KVM_GUEST
+static struct kvm_task_sleep_node *kvm_pv_async_pf_find(
+		struct kvm_task_sleep_head *b, u32 token)
+{
+	struct kvm_task_sleep_node *n;
+	struct hlist_node *p;
+
+	hlist_for_each(p, &b->list) {
+		n = hlist_entry(p, typeof(*n), link);
+		if (n->token == token)
+			return n;
+	}
+
+	return NULL;
+}
+
+static void kvm_pv_async_pf_wait(u32 token, int interrupt_kernel)
+{
+	u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS);
+	struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
+	struct kvm_task_sleep_node n, *e;
+	DECLARE_SWAITQUEUE(wait);
+
+	raw_spin_lock(&b->lock);
+	e = kvm_pv_async_pf_find(b, token);
+	if (e) {
+		/* dummy entry exist -> wake up was delivered ahead of PF */
+		hlist_del(&e->link);
+		kfree(e);
+		raw_spin_unlock(&b->lock);
+
+		return;
+	}
+
+	n.token = token;
+	n.cpu = smp_processor_id();
+	n.halted = is_idle_task(current) ||
+		   (IS_ENABLED(CONFIG_PREEMPT_COUNT) ?
+			preempt_count() > 1 || rcu_preempt_depth() :
+			interrupt_kernel);
+	init_swait_queue_head(&n.wq);
+	hlist_add_head(&n.link, &b->list);
+	raw_spin_unlock(&b->lock);
+
+	for (;;) {
+		if (!n.halted) {
+			prepare_to_swait_exclusive(&n.wq, &wait,
+						   TASK_UNINTERRUPTIBLE);
+		}
+
+		if (hlist_unhashed(&n.link))
+			break;
+
+		/*
+		 * Enable the IRQ explicitly. Otherwise, the task
+		 * won't be scheduled or waken up properly.
+		 */
+		local_irq_enable();
+
+		if (!n.halted) {
+			schedule();
+		} else {
+			dsb(sy);
+			wfi();
+		}
+
+		local_irq_disable();
+	}
+
+	if (!n.halted)
+		finish_swait(&n.wq, &wait);
+}
+
+static inline void kvm_pv_async_pf_wake_one(struct kvm_task_sleep_node *n)
+{
+	/* The task will be waken up once being detached */
+	hlist_del_init(&n->link);
+
+	if (!n->halted)
+		swake_up_one(&n->wq);
+	else
+		smp_send_reschedule(n->cpu);
+}
+
+static void kvm_pv_async_pf_wake_all(void)
+{
+	struct kvm_task_sleep_head *b;
+	struct kvm_task_sleep_node *n;
+	struct hlist_node *p, *next;
+	int i;
+
+	for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) {
+		b = &async_pf_sleepers[i];
+
+		raw_spin_lock(&b->lock);
+
+		hlist_for_each_safe(p, next, &b->list) {
+			n = hlist_entry(p, typeof(*n), link);
+			if (n->cpu != smp_processor_id())
+				continue;
+
+			kvm_pv_async_pf_wake_one(n);
+		}
+
+		raw_spin_unlock(&b->lock);
+	}
+}
+
+static void kvm_pv_async_pf_wake(u32 token)
+{
+	u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS);
+	struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
+	struct kvm_task_sleep_node *n;
+
+	if (token == ~0) {
+		kvm_pv_async_pf_wake_all();
+		return;
+	}
+
+again:
+	raw_spin_lock(&b->lock);
+
+	n = kvm_pv_async_pf_find(b, token);
+	if (!n) {
+		/*
+		 * Async PF was not yet handled. Add dummy entry
+		 * for the token. Busy wait until other CPU handles
+		 * the async PF on allocation failure.
+		 */
+		n = kzalloc(sizeof(*n), GFP_ATOMIC);
+		if (!n) {
+			raw_spin_unlock(&b->lock);
+			cpu_relax();
+			goto again;
+		}
+		n->token = token;
+		n->cpu = smp_processor_id();
+		init_swait_queue_head(&n->wq);
+		hlist_add_head(&n->link, &b->list);
+	} else {
+		kvm_pv_async_pf_wake_one(n);
+	}
+
+	raw_spin_unlock(&b->lock);
+}
+#endif /* CONFIG_KVM_GUEST */
+
+static int do_lockdown(unsigned long addr, unsigned int esr,
+		       struct pt_regs *regs)
+{
+#ifdef CONFIG_KVM_GUEST
+	u32 reason = 0;
+
+	if (__this_cpu_read(apf_data.enabled)) {
+		reason = __this_cpu_read(apf_data.reason);
+		__this_cpu_write(apf_data.reason, 0);
+	}
+
+	switch (reason) {
+	case KVM_PV_REASON_PAGE_NOT_PRESENT:
+		kvm_pv_async_pf_wait((u32)addr, !user_mode(regs));
+		return 0;
+	case KVM_PV_REASON_PAGE_READY:
+		kvm_pv_async_pf_wake((u32)addr);
+		return 0;
+	}
+#endif /* CONFIG_KVM_GUEST */
+
+	pr_info("%s: addr=0x%lx, esr=0x%x\n", __func__, addr, esr);
+	return 1;
+}
+
 static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs)
 {
 	return 1; /* "fault" */
@@ -703,7 +900,8 @@ static const struct fault_info fault_info[] = {
 	{ do_bad,		SIGKILL, SI_KERNEL,	"Unsupported atomic hardware update fault"	},
 	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 50"			},
 	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 51"			},
-	{ do_bad,		SIGKILL, SI_KERNEL,	"implementation fault (lockdown abort)" },
+	{ do_lockdown,		SIGKILL, SI_KERNEL,
+	  "implementation fault (lockdown abort)" },
 	{ do_bad,		SIGBUS,  BUS_OBJERR,	"implementation fault (unsupported exclusive)" },
 	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 54"			},
 	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 55"			},
@@ -878,3 +1076,42 @@ void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr,
 	debug_exception_exit(regs);
 }
 NOKPROBE_SYMBOL(do_debug_exception);
+
+#ifdef CONFIG_KVM_GUEST
+void kvm_pv_async_pf_enable(void)
+{
+	u64 pa;
+	int i;
+
+	if (!kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) ||
+	     __this_cpu_read(apf_data.enabled))
+		return;
+
+	if (!async_pf_initialized) {
+		async_pf_initialized = true;
+		for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++)
+			raw_spin_lock_init(&async_pf_sleepers[i].lock);
+	}
+
+	/* FIXME: Enable KVM_ASYNC_PF_SEND_ALWAYS on CONFIG_PREEMPTION */
+	pa = virt_to_phys(this_cpu_ptr(&apf_data));
+	pa |= KVM_ASYNC_PF_ENABLED;
+
+	__this_cpu_write(apf_data.enabled, 1);
+	write_sysreg_s(pa, SYS_ASYNC_PF_EL1);
+
+	pr_info("Async PF enabled on CPU %d\n", smp_processor_id());
+}
+
+void kvm_pv_async_pf_disable(void)
+{
+	if (!kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) ||
+	    !__this_cpu_read(apf_data.enabled))
+		return;
+
+	write_sysreg_s(0, SYS_ASYNC_PF_EL1);
+	__this_cpu_write(apf_data.enabled, 0);
+
+	pr_info("Async PF disabled on CPU %d\n", smp_processor_id());
+}
+#endif /* CONFIG_KVM_GUEST */
-- 
2.23.0

_______________________________________________
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

WARNING: multiple messages have this Message-ID (diff)
From: Gavin Shan <gshan@redhat.com>
To: kvmarm@lists.cs.columbia.edu
Cc: mark.rutland@arm.com, drjones@redhat.com, suzuki.poulose@arm.com,
	maz@kernel.org, sudeep.holla@arm.com, eric.auger@redhat.com,
	james.morse@arm.com, shan.gavin@gmail.com,
	catalin.marinas@arm.com, will@kernel.org,
	linux-arm-kernel@lists.infradead.org
Subject: [PATCH RFCv1 7/7] arm64: Support async page fault
Date: Fri, 10 Apr 2020 18:58:20 +1000	[thread overview]
Message-ID: <20200410085820.758686-8-gshan@redhat.com> (raw)
In-Reply-To: <20200410085820.758686-1-gshan@redhat.com>

This supports asynchronous page fault for the guest. The design is
similar to what x86 has: on receiving a PAGE_NOT_PRESENT signal from
the hypervisor, the current task is either rescheduled or put into
power-saving mode. The task will be waken up when PAGE_READY signal
is received.

The signals are conveyed through data abort with specific (IMPDEF)
Data Fault Status Code (DFSC). Besides, a hash table is introduced
to track the processes that have been put into waiting state, to
avoid out-of-consistency.

The feature is put into the CONFIG_KVM_GUEST umbrella, which is added
by this patch.

Signed-off-by: Gavin Shan <gshan@redhat.com>
---
 arch/arm64/Kconfig                 |  11 ++
 arch/arm64/include/asm/exception.h |   5 +
 arch/arm64/include/asm/kvm_para.h  |  42 ++++-
 arch/arm64/kernel/smp.c            |  47 ++++++
 arch/arm64/mm/fault.c              | 239 ++++++++++++++++++++++++++++-
 5 files changed, 336 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 40fb05d96c60..2d5e5ee62d6d 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1045,6 +1045,17 @@ config PARAVIRT
 	  under a hypervisor, potentially improving performance significantly
 	  over full virtualization.
 
+config KVM_GUEST
+	bool "KVM Guest Support"
+	depends on PARAVIRT
+	default y
+	help
+	  This option enables various optimizations for running under the KVM
+	  hypervisor. Overhead for the kernel when not running inside KVM should
+	  be minimal.
+
+	  In case of doubt, say Y
+
 config PARAVIRT_TIME_ACCOUNTING
 	bool "Paravirtual steal time accounting"
 	select PARAVIRT
diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h
index 7a6e81ca23a8..17ac2db36472 100644
--- a/arch/arm64/include/asm/exception.h
+++ b/arch/arm64/include/asm/exception.h
@@ -46,4 +46,9 @@ void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr);
 void do_cp15instr(unsigned int esr, struct pt_regs *regs);
 void do_el0_svc(struct pt_regs *regs);
 void do_el0_svc_compat(struct pt_regs *regs);
+
+#ifdef CONFIG_KVM_GUEST
+void kvm_pv_async_pf_enable(void);
+void kvm_pv_async_pf_disable(void);
+#endif /* CONFIG_KVM_GUEST */
 #endif	/* __ASM_EXCEPTION_H */
diff --git a/arch/arm64/include/asm/kvm_para.h b/arch/arm64/include/asm/kvm_para.h
index 0ea481dd1c7a..a43bed479c2b 100644
--- a/arch/arm64/include/asm/kvm_para.h
+++ b/arch/arm64/include/asm/kvm_para.h
@@ -3,6 +3,30 @@
 #define _ASM_ARM_KVM_PARA_H
 
 #include <uapi/asm/kvm_para.h>
+#include <linux/of.h>
+
+#ifdef CONFIG_KVM_GUEST
+static inline int kvm_para_available(void)
+{
+	struct device_node *hyper_node;
+	int ret = 0;
+
+	hyper_node = of_find_node_by_path("/hypervisor");
+	if (!hyper_node)
+		return 0;
+
+	if (of_device_is_compatible(hyper_node, "linux,kvm"))
+		ret = 1;
+
+	of_node_put(hyper_node);
+	return ret;
+}
+#else
+static inline int kvm_para_available(void)
+{
+	return 0;
+}
+#endif /* CONFIG_KVM_GUEST */
 
 static inline bool kvm_check_and_clear_guest_paused(void)
 {
@@ -11,17 +35,21 @@ static inline bool kvm_check_and_clear_guest_paused(void)
 
 static inline unsigned int kvm_arch_para_features(void)
 {
-	return 0;
+	struct device_node *hyper_node;
+	unsigned int features = 0;
+
+	if (!kvm_para_available())
+		return 0;
+
+	hyper_node = of_find_node_by_path("/hypervisor");
+	of_property_read_u32(hyper_node, "para-features", &features);
+	of_node_put(hyper_node);
+
+	return features;
 }
 
 static inline unsigned int kvm_arch_para_hints(void)
 {
 	return 0;
 }
-
-static inline bool kvm_para_available(void)
-{
-	return false;
-}
-
 #endif /* _ASM_ARM_KVM_PARA_H */
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 061f60fe452f..cc97a8462d7f 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -40,6 +40,7 @@
 #include <asm/cputype.h>
 #include <asm/cpu_ops.h>
 #include <asm/daifflags.h>
+#include <asm/exception.h>
 #include <asm/kvm_mmu.h>
 #include <asm/mmu_context.h>
 #include <asm/numa.h>
@@ -443,6 +444,38 @@ void __init smp_cpus_done(unsigned int max_cpus)
 	mark_linear_text_alias_ro();
 }
 
+#ifdef CONFIG_KVM_GUEST
+static void kvm_cpu_reboot(void *unused)
+{
+	kvm_pv_async_pf_disable();
+}
+
+static int kvm_cpu_reboot_notify(struct notifier_block *nb,
+				 unsigned long code, void *unused)
+{
+	if (code == SYS_RESTART)
+		on_each_cpu(kvm_cpu_reboot, NULL, 1);
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block kvm_cpu_reboot_nb = {
+	.notifier_call = kvm_cpu_reboot_notify,
+};
+
+static int kvm_cpu_online(unsigned int cpu)
+{
+	kvm_pv_async_pf_enable();
+	return 0;
+}
+
+static int kvm_cpu_offline(unsigned int cpu)
+{
+	kvm_pv_async_pf_disable();
+	return 0;
+}
+#endif /* CONFIG_KVM_GUEST */
+
 void __init smp_prepare_boot_cpu(void)
 {
 	set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
@@ -458,6 +491,20 @@ void __init smp_prepare_boot_cpu(void)
 	/* Conditionally switch to GIC PMR for interrupt masking */
 	if (system_uses_irq_prio_masking())
 		init_gic_priority_masking();
+
+
+	/* Enable async page fault */
+#ifdef CONFIG_KVM_GUEST
+	register_reboot_notifier(&kvm_cpu_reboot_nb);
+	if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
+		"arm/kvm:online", kvm_cpu_online, kvm_cpu_offline) < 0) {
+		pr_warn("%s: Failed to install cpu hotplug callbacks\n",
+			__func__);
+		return;
+	}
+
+	kvm_pv_async_pf_enable();
+#endif /* CONFIG_KVM_GUEST */
 }
 
 static u64 __init of_get_cpu_mpidr(struct device_node *dn)
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 1027851d469a..39c7570fe303 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -19,10 +19,12 @@
 #include <linux/page-flags.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/debug.h>
+#include <linux/swait.h>
 #include <linux/highmem.h>
 #include <linux/perf_event.h>
 #include <linux/preempt.h>
 #include <linux/hugetlb.h>
+#include <linux/kvm_para.h>
 
 #include <asm/acpi.h>
 #include <asm/bug.h>
@@ -48,8 +50,31 @@ struct fault_info {
 	const char *name;
 };
 
+#ifdef CONFIG_KVM_GUEST
+#define KVM_TASK_SLEEP_HASHBITS		8
+#define KVM_TASK_SLEEP_HASHSIZE	(1 << KVM_TASK_SLEEP_HASHBITS)
+
+struct kvm_task_sleep_node {
+	struct hlist_node	link;
+	struct swait_queue_head	wq;
+	u32			token;
+	int			cpu;
+	bool			halted;
+};
+
+struct kvm_task_sleep_head {
+	raw_spinlock_t		lock;
+	struct hlist_head	list;
+};
+#endif /* CONFIG_KVM_GUEST */
+
 static const struct fault_info fault_info[];
 static struct fault_info debug_fault_info[];
+#ifdef CONFIG_KVM_GUEST
+static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_data) __aligned(64);
+static struct kvm_task_sleep_head async_pf_sleepers[KVM_TASK_SLEEP_HASHSIZE];
+static bool async_pf_initialized;
+#endif
 
 static inline const struct fault_info *esr_to_fault_info(unsigned int esr)
 {
@@ -623,6 +648,178 @@ static int do_alignment_fault(unsigned long addr, unsigned int esr,
 	return 0;
 }
 
+#ifdef CONFIG_KVM_GUEST
+static struct kvm_task_sleep_node *kvm_pv_async_pf_find(
+		struct kvm_task_sleep_head *b, u32 token)
+{
+	struct kvm_task_sleep_node *n;
+	struct hlist_node *p;
+
+	hlist_for_each(p, &b->list) {
+		n = hlist_entry(p, typeof(*n), link);
+		if (n->token == token)
+			return n;
+	}
+
+	return NULL;
+}
+
+static void kvm_pv_async_pf_wait(u32 token, int interrupt_kernel)
+{
+	u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS);
+	struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
+	struct kvm_task_sleep_node n, *e;
+	DECLARE_SWAITQUEUE(wait);
+
+	raw_spin_lock(&b->lock);
+	e = kvm_pv_async_pf_find(b, token);
+	if (e) {
+		/* dummy entry exist -> wake up was delivered ahead of PF */
+		hlist_del(&e->link);
+		kfree(e);
+		raw_spin_unlock(&b->lock);
+
+		return;
+	}
+
+	n.token = token;
+	n.cpu = smp_processor_id();
+	n.halted = is_idle_task(current) ||
+		   (IS_ENABLED(CONFIG_PREEMPT_COUNT) ?
+			preempt_count() > 1 || rcu_preempt_depth() :
+			interrupt_kernel);
+	init_swait_queue_head(&n.wq);
+	hlist_add_head(&n.link, &b->list);
+	raw_spin_unlock(&b->lock);
+
+	for (;;) {
+		if (!n.halted) {
+			prepare_to_swait_exclusive(&n.wq, &wait,
+						   TASK_UNINTERRUPTIBLE);
+		}
+
+		if (hlist_unhashed(&n.link))
+			break;
+
+		/*
+		 * Enable the IRQ explicitly. Otherwise, the task
+		 * won't be scheduled or waken up properly.
+		 */
+		local_irq_enable();
+
+		if (!n.halted) {
+			schedule();
+		} else {
+			dsb(sy);
+			wfi();
+		}
+
+		local_irq_disable();
+	}
+
+	if (!n.halted)
+		finish_swait(&n.wq, &wait);
+}
+
+static inline void kvm_pv_async_pf_wake_one(struct kvm_task_sleep_node *n)
+{
+	/* The task will be waken up once being detached */
+	hlist_del_init(&n->link);
+
+	if (!n->halted)
+		swake_up_one(&n->wq);
+	else
+		smp_send_reschedule(n->cpu);
+}
+
+static void kvm_pv_async_pf_wake_all(void)
+{
+	struct kvm_task_sleep_head *b;
+	struct kvm_task_sleep_node *n;
+	struct hlist_node *p, *next;
+	int i;
+
+	for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) {
+		b = &async_pf_sleepers[i];
+
+		raw_spin_lock(&b->lock);
+
+		hlist_for_each_safe(p, next, &b->list) {
+			n = hlist_entry(p, typeof(*n), link);
+			if (n->cpu != smp_processor_id())
+				continue;
+
+			kvm_pv_async_pf_wake_one(n);
+		}
+
+		raw_spin_unlock(&b->lock);
+	}
+}
+
+static void kvm_pv_async_pf_wake(u32 token)
+{
+	u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS);
+	struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
+	struct kvm_task_sleep_node *n;
+
+	if (token == ~0) {
+		kvm_pv_async_pf_wake_all();
+		return;
+	}
+
+again:
+	raw_spin_lock(&b->lock);
+
+	n = kvm_pv_async_pf_find(b, token);
+	if (!n) {
+		/*
+		 * Async PF was not yet handled. Add dummy entry
+		 * for the token. Busy wait until other CPU handles
+		 * the async PF on allocation failure.
+		 */
+		n = kzalloc(sizeof(*n), GFP_ATOMIC);
+		if (!n) {
+			raw_spin_unlock(&b->lock);
+			cpu_relax();
+			goto again;
+		}
+		n->token = token;
+		n->cpu = smp_processor_id();
+		init_swait_queue_head(&n->wq);
+		hlist_add_head(&n->link, &b->list);
+	} else {
+		kvm_pv_async_pf_wake_one(n);
+	}
+
+	raw_spin_unlock(&b->lock);
+}
+#endif /* CONFIG_KVM_GUEST */
+
+static int do_lockdown(unsigned long addr, unsigned int esr,
+		       struct pt_regs *regs)
+{
+#ifdef CONFIG_KVM_GUEST
+	u32 reason = 0;
+
+	if (__this_cpu_read(apf_data.enabled)) {
+		reason = __this_cpu_read(apf_data.reason);
+		__this_cpu_write(apf_data.reason, 0);
+	}
+
+	switch (reason) {
+	case KVM_PV_REASON_PAGE_NOT_PRESENT:
+		kvm_pv_async_pf_wait((u32)addr, !user_mode(regs));
+		return 0;
+	case KVM_PV_REASON_PAGE_READY:
+		kvm_pv_async_pf_wake((u32)addr);
+		return 0;
+	}
+#endif /* CONFIG_KVM_GUEST */
+
+	pr_info("%s: addr=0x%lx, esr=0x%x\n", __func__, addr, esr);
+	return 1;
+}
+
 static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs)
 {
 	return 1; /* "fault" */
@@ -703,7 +900,8 @@ static const struct fault_info fault_info[] = {
 	{ do_bad,		SIGKILL, SI_KERNEL,	"Unsupported atomic hardware update fault"	},
 	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 50"			},
 	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 51"			},
-	{ do_bad,		SIGKILL, SI_KERNEL,	"implementation fault (lockdown abort)" },
+	{ do_lockdown,		SIGKILL, SI_KERNEL,
+	  "implementation fault (lockdown abort)" },
 	{ do_bad,		SIGBUS,  BUS_OBJERR,	"implementation fault (unsupported exclusive)" },
 	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 54"			},
 	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 55"			},
@@ -878,3 +1076,42 @@ void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr,
 	debug_exception_exit(regs);
 }
 NOKPROBE_SYMBOL(do_debug_exception);
+
+#ifdef CONFIG_KVM_GUEST
+void kvm_pv_async_pf_enable(void)
+{
+	u64 pa;
+	int i;
+
+	if (!kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) ||
+	     __this_cpu_read(apf_data.enabled))
+		return;
+
+	if (!async_pf_initialized) {
+		async_pf_initialized = true;
+		for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++)
+			raw_spin_lock_init(&async_pf_sleepers[i].lock);
+	}
+
+	/* FIXME: Enable KVM_ASYNC_PF_SEND_ALWAYS on CONFIG_PREEMPTION */
+	pa = virt_to_phys(this_cpu_ptr(&apf_data));
+	pa |= KVM_ASYNC_PF_ENABLED;
+
+	__this_cpu_write(apf_data.enabled, 1);
+	write_sysreg_s(pa, SYS_ASYNC_PF_EL1);
+
+	pr_info("Async PF enabled on CPU %d\n", smp_processor_id());
+}
+
+void kvm_pv_async_pf_disable(void)
+{
+	if (!kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) ||
+	    !__this_cpu_read(apf_data.enabled))
+		return;
+
+	write_sysreg_s(0, SYS_ASYNC_PF_EL1);
+	__this_cpu_write(apf_data.enabled, 0);
+
+	pr_info("Async PF disabled on CPU %d\n", smp_processor_id());
+}
+#endif /* CONFIG_KVM_GUEST */
-- 
2.23.0


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

  parent reply	other threads:[~2020-04-10  9:42 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-04-10  8:58 [PATCH RFCv1 0/7] Support Async Page Fault Gavin Shan
2020-04-10  8:58 ` Gavin Shan
2020-04-10  8:58 ` [PATCH RFCv1 1/7] kvm/arm64: Rename kvm_vcpu_get_hsr() to kvm_vcpu_get_esr() Gavin Shan
2020-04-10  8:58   ` Gavin Shan
2020-04-10  8:58 ` [PATCH RFCv1 2/7] kvm/arm64: Detach ESR operator from vCPU struct Gavin Shan
2020-04-10  8:58   ` Gavin Shan
2020-04-10  8:58 ` [PATCH RFCv1 3/7] kvm/arm64: Replace hsr with esr Gavin Shan
2020-04-10  8:58   ` Gavin Shan
2020-04-10  8:58 ` [PATCH RFCv1 4/7] kvm/arm64: Export kvm_handle_user_mem_abort() with prefault mode Gavin Shan
2020-04-10  8:58   ` Gavin Shan
2020-04-10  8:58 ` [PATCH RFCv1 5/7] kvm/arm64: Allow inject data abort with specified DFSC Gavin Shan
2020-04-10  8:58   ` Gavin Shan
2020-04-10  8:58 ` [PATCH RFCv1 6/7] kvm/arm64: Support async page fault Gavin Shan
2020-04-10  8:58   ` Gavin Shan
2020-04-10 20:27   ` kbuild test robot
2020-04-10  8:58 ` Gavin Shan [this message]
2020-04-10  8:58   ` [PATCH RFCv1 7/7] arm64: " Gavin Shan
2020-04-10 12:52 ` [PATCH RFCv1 0/7] Support Async Page Fault Marc Zyngier
2020-04-10 12:52   ` Marc Zyngier
2020-04-14  5:39   ` Gavin Shan
2020-04-14  5:39     ` Gavin Shan
2020-04-14 11:05     ` Mark Rutland
2020-04-14 11:05       ` Mark Rutland
2020-04-16  7:59       ` Gavin Shan
2020-04-16  7:59         ` Gavin Shan
2020-04-16  9:16         ` Mark Rutland
2020-04-16  9:16           ` Mark Rutland
2020-04-16  9:21           ` Will Deacon
2020-04-16  9:21             ` Will Deacon
2020-04-17 10:34           ` Gavin Shan
2020-04-17 10:34             ` Gavin Shan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200410085820.758686-8-gshan@redhat.com \
    --to=gshan@redhat.com \
    --cc=catalin.marinas@arm.com \
    --cc=kvmarm@lists.cs.columbia.edu \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=maz@kernel.org \
    --cc=shan.gavin@gmail.com \
    --cc=sudeep.holla@arm.com \
    --cc=will@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.