linux-mips.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 3/4] kvm: Replace vcpu->swait with rcuwait
       [not found] <20200324044453.15733-1-dave@stgolabs.net>
@ 2020-03-24  4:44 ` Davidlohr Bueso
  2020-03-24 12:07   ` kbuild test robot
                     ` (3 more replies)
  0 siblings, 4 replies; 15+ messages in thread
From: Davidlohr Bueso @ 2020-03-24  4:44 UTC (permalink / raw)
  To: tglx, pbonzini
  Cc: bigeasy, peterz, rostedt, torvalds, will, joel, linux-kernel,
	kvm, dave, Paul Mackerras, kvmarm, linux-mips, Davidlohr Bueso

The use of any sort of waitqueue (simple or regular) for
wait/waking vcpus has always been an overkill and semantically
wrong. Because this is per-vcpu (which is blocked) there is
only ever a single waiting vcpu, thus no need for any sort of
queue.

As such, make use of the rcuwait primitive, with the following
considerations:

  - rcuwait already provides the proper barriers that serialize
  concurrent waiter and waker.

  - Task wakeup is done in rcu read critical region, with a
  stable task pointer.

  - Because there is no concurrency among waiters, we need
  not worry about rcuwait_wait_event() calls corrupting
  the wait->task. As a consequence, this saves the locking
  done in swait when modifying the queue. This also applies
  to per-vcore wait for powerpc kvm-hv.

The x86-tscdeadline_latency test mentioned in 8577370fb0cb
("KVM: Use simple waitqueue for vcpu->wq") shows that, on avg,
latency is reduced by around 15% with this change.

This patch also changes TASK_INTERRUPTIBLE for TASK_IDLE, as
kvm is (ab)using the former such that idle vcpus do no contribute
to the loadavg. Let use the correct semantics for this.

Cc: Paul Mackerras <paulus@ozlabs.org>
Cc: kvmarm@lists.cs.columbia.edu
Cc: linux-mips@vger.kernel.org
Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
---
 arch/mips/kvm/mips.c                  |  6 ++----
 arch/powerpc/include/asm/kvm_book3s.h |  2 +-
 arch/powerpc/include/asm/kvm_host.h   |  2 +-
 arch/powerpc/kvm/book3s_hv.c          | 22 ++++++++--------------
 arch/powerpc/kvm/powerpc.c            |  2 +-
 arch/x86/kvm/lapic.c                  |  2 +-
 include/linux/kvm_host.h              | 10 +++++-----
 virt/kvm/arm/arch_timer.c             |  2 +-
 virt/kvm/arm/arm.c                    |  9 +++++----
 virt/kvm/async_pf.c                   |  3 +--
 virt/kvm/kvm_main.c                   | 31 +++++++++++--------------------
 11 files changed, 37 insertions(+), 54 deletions(-)

diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 71244bf87c3a..e049fcb3dffb 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -290,8 +290,7 @@ static enum hrtimer_restart kvm_mips_comparecount_wakeup(struct hrtimer *timer)
 	kvm_mips_callbacks->queue_timer_int(vcpu);
 
 	vcpu->arch.wait = 0;
-	if (swq_has_sleeper(&vcpu->wq))
-		swake_up_one(&vcpu->wq);
+	rcuwait_wake_up(&vcpu->wait)
 
 	return kvm_mips_count_timeout(vcpu);
 }
@@ -517,8 +516,7 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
 
 	dvcpu->arch.wait = 0;
 
-	if (swq_has_sleeper(&dvcpu->wq))
-		swake_up_one(&dvcpu->wq);
+	rcuwait_wake_up(&dvcpu->wait);
 
 	return 0;
 }
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 506e4df2d730..6e5d85ba588d 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -78,7 +78,7 @@ struct kvmppc_vcore {
 	struct kvm_vcpu *runnable_threads[MAX_SMT_THREADS];
 	struct list_head preempt_list;
 	spinlock_t lock;
-	struct swait_queue_head wq;
+	struct rcuwait wait;
 	spinlock_t stoltb_lock;	/* protects stolen_tb and preempt_tb */
 	u64 stolen_tb;
 	u64 preempt_tb;
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 6e8b8ffd06ad..e2b4a1e3fb7d 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -752,7 +752,7 @@ struct kvm_vcpu_arch {
 	u8 irq_pending; /* Used by XIVE to signal pending guest irqs */
 	u32 last_inst;
 
-	struct swait_queue_head *wqp;
+	struct rcuwait *waitp;
 	struct kvmppc_vcore *vcore;
 	int ret;
 	int trap;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 2cefd071b848..1a7a22122211 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -231,13 +231,11 @@ static bool kvmppc_ipi_thread(int cpu)
 static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
 {
 	int cpu;
-	struct swait_queue_head *wqp;
+	struct rcuwait *wait;
 
-	wqp = kvm_arch_vcpu_wq(vcpu);
-	if (swq_has_sleeper(wqp)) {
-		swake_up_one(wqp);
+	wait = kvm_arch_vcpu_get_wait(vcpu);
+	if (rcuwait_wake_up(wait))
 		++vcpu->stat.halt_wakeup;
-	}
 
 	cpu = READ_ONCE(vcpu->arch.thread_cpu);
 	if (cpu >= 0 && kvmppc_ipi_thread(cpu))
@@ -2116,7 +2114,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int id)
 
 	spin_lock_init(&vcore->lock);
 	spin_lock_init(&vcore->stoltb_lock);
-	init_swait_queue_head(&vcore->wq);
+	rcuwait_init(&vcore->wait);
 	vcore->preempt_tb = TB_NIL;
 	vcore->lpcr = kvm->arch.lpcr;
 	vcore->first_vcpuid = id;
@@ -3779,7 +3777,6 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
 	ktime_t cur, start_poll, start_wait;
 	int do_sleep = 1;
 	u64 block_ns;
-	DECLARE_SWAITQUEUE(wait);
 
 	/* Poll for pending exceptions and ceded state */
 	cur = start_poll = ktime_get();
@@ -3807,10 +3804,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
 		}
 	}
 
-	prepare_to_swait_exclusive(&vc->wq, &wait, TASK_INTERRUPTIBLE);
-
 	if (kvmppc_vcore_check_block(vc)) {
-		finish_swait(&vc->wq, &wait);
 		do_sleep = 0;
 		/* If we polled, count this as a successful poll */
 		if (vc->halt_poll_ns)
@@ -3823,8 +3817,8 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
 	vc->vcore_state = VCORE_SLEEPING;
 	trace_kvmppc_vcore_blocked(vc, 0);
 	spin_unlock(&vc->lock);
-	schedule();
-	finish_swait(&vc->wq, &wait);
+	rcuwait_wait_event(&vc->wait,
+			   kvmppc_vcore_check_block(vc), TASK_IDLE);
 	spin_lock(&vc->lock);
 	vc->vcore_state = VCORE_INACTIVE;
 	trace_kvmppc_vcore_blocked(vc, 1);
@@ -3935,7 +3929,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 			kvmppc_start_thread(vcpu, vc);
 			trace_kvm_guest_enter(vcpu);
 		} else if (vc->vcore_state == VCORE_SLEEPING) {
-			swake_up_one(&vc->wq);
+		        rcuwait_wake_up(&vc->wait);
 		}
 
 	}
@@ -4274,7 +4268,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	}
 	user_vrsave = mfspr(SPRN_VRSAVE);
 
-	vcpu->arch.wqp = &vcpu->arch.vcore->wq;
+	vcpu->arch.waitp = &vcpu->arch.vcore->wait;
 	vcpu->arch.pgdir = kvm->mm->pgd;
 	vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
 
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 302e9dccdd6d..32a0fab53fc9 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -754,7 +754,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
 	if (err)
 		goto out_vcpu_uninit;
 
-	vcpu->arch.wqp = &vcpu->wq;
+	vcpu->arch.waitp = &vcpu->wait;
 	kvmppc_create_vcpu_debugfs(vcpu, vcpu->vcpu_id);
 	return 0;
 
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index e3099c642fec..a4420c26dfbc 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1815,7 +1815,7 @@ void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
 	/* If the preempt notifier has already run, it also called apic_timer_expired */
 	if (!apic->lapic_timer.hv_timer_in_use)
 		goto out;
-	WARN_ON(swait_active(&vcpu->wq));
+	WARN_ON(rcu_dereference(vcpu->wait.task));
 	cancel_hv_timer(apic);
 	apic_timer_expired(apic);
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index bcb9b2ac0791..93ab0ab66de5 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -23,7 +23,7 @@
 #include <linux/irqflags.h>
 #include <linux/context_tracking.h>
 #include <linux/irqbypass.h>
-#include <linux/swait.h>
+#include <linux/rcuwait.h>
 #include <linux/refcount.h>
 #include <linux/nospec.h>
 #include <asm/signal.h>
@@ -277,7 +277,7 @@ struct kvm_vcpu {
 	struct mutex mutex;
 	struct kvm_run *run;
 
-	struct swait_queue_head wq;
+	struct rcuwait wait;
 	struct pid __rcu *pid;
 	int sigset_active;
 	sigset_t sigset;
@@ -952,12 +952,12 @@ static inline bool kvm_arch_has_assigned_device(struct kvm *kvm)
 }
 #endif
 
-static inline struct swait_queue_head *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
+static inline struct rcuwait *kvm_arch_vcpu_get_wait(struct kvm_vcpu *vcpu)
 {
 #ifdef __KVM_HAVE_ARCH_WQP
-	return vcpu->arch.wqp;
+	return vcpu->arch.waitp;
 #else
-	return &vcpu->wq;
+	return &vcpu->wait;
 #endif
 }
 
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 0d9438e9de2a..4be71cb58691 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -593,7 +593,7 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
 	if (map.emul_ptimer)
 		soft_timer_cancel(&map.emul_ptimer->hrtimer);
 
-	if (swait_active(kvm_arch_vcpu_wq(vcpu)))
+	if (rcu_dereference(kvm_arch_vpu_get_wait(vcpu)) != NULL)
 		kvm_timer_blocking(vcpu);
 
 	/*
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index eda7b624eab8..98740a5b8c9b 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -579,16 +579,17 @@ void kvm_arm_resume_guest(struct kvm *kvm)
 
 	kvm_for_each_vcpu(i, vcpu, kvm) {
 		vcpu->arch.pause = false;
-		swake_up_one(kvm_arch_vcpu_wq(vcpu));
+		rcuwait_wake_up(kvm_arch_vcpu_get_wait(vcpu));
 	}
 }
 
 static void vcpu_req_sleep(struct kvm_vcpu *vcpu)
 {
-	struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu);
+	struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu);
 
-	swait_event_interruptible_exclusive(*wq, ((!vcpu->arch.power_off) &&
-				       (!vcpu->arch.pause)));
+	rcuwait_wait_event(*wait,
+			   (!vcpu->arch.power_off) &&(!vcpu->arch.pause),
+			   TASK_IDLE);
 
 	if (vcpu->arch.power_off || vcpu->arch.pause) {
 		/* Awaken to handle a signal, request we sleep again later. */
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 15e5b037f92d..10b533f641a6 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -80,8 +80,7 @@ static void async_pf_execute(struct work_struct *work)
 
 	trace_kvm_async_pf_completed(addr, cr2_or_gpa);
 
-	if (swq_has_sleeper(&vcpu->wq))
-		swake_up_one(&vcpu->wq);
+	rcuwait_wake_up(&vcpu->wait);
 
 	mmput(mm);
 	kvm_put_kvm(vcpu->kvm);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 70f03ce0e5c1..887efb39fb1a 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -343,7 +343,7 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
 	vcpu->kvm = kvm;
 	vcpu->vcpu_id = id;
 	vcpu->pid = NULL;
-	init_swait_queue_head(&vcpu->wq);
+	rcuwait_init(&vcpu->wait);
 	kvm_async_pf_vcpu_init(vcpu);
 
 	vcpu->pre_pcpu = -1;
@@ -2465,9 +2465,8 @@ static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu)
 void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 {
 	ktime_t start, cur;
-	DECLARE_SWAITQUEUE(wait);
-	bool waited = false;
 	u64 block_ns;
+	int block_check = -EINTR;
 
 	kvm_arch_vcpu_blocking(vcpu);
 
@@ -2491,17 +2490,9 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 		} while (single_task_running() && ktime_before(cur, stop));
 	}
 
-	for (;;) {
-		prepare_to_swait_exclusive(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
-
-		if (kvm_vcpu_check_block(vcpu) < 0)
-			break;
-
-		waited = true;
-		schedule();
-	}
-
-	finish_swait(&vcpu->wq, &wait);
+	rcuwait_wait_event(&vcpu->wait,
+			   (block_check = kvm_vcpu_check_block(vcpu)) < 0,
+			   TASK_IDLE);
 	cur = ktime_get();
 out:
 	kvm_arch_vcpu_unblocking(vcpu);
@@ -2525,18 +2516,17 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 		}
 	}
 
-	trace_kvm_vcpu_wakeup(block_ns, waited, vcpu_valid_wakeup(vcpu));
+	trace_kvm_vcpu_wakeup(block_ns, !block_check, vcpu_valid_wakeup(vcpu));
 	kvm_arch_vcpu_block_finish(vcpu);
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_block);
 
 bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu)
 {
-	struct swait_queue_head *wqp;
+	struct rcuwait *wait;
 
-	wqp = kvm_arch_vcpu_wq(vcpu);
-	if (swq_has_sleeper(wqp)) {
-		swake_up_one(wqp);
+	wait = kvm_arch_vcpu_get_wait(vcpu);
+	if (rcuwait_wake_up(wait)) {
 		WRITE_ONCE(vcpu->ready, true);
 		++vcpu->stat.halt_wakeup;
 		return true;
@@ -2678,7 +2668,8 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode)
 				continue;
 			if (vcpu == me)
 				continue;
-			if (swait_active(&vcpu->wq) && !vcpu_dy_runnable(vcpu))
+			if (rcu_dereference(vcpu->wait.task) &&
+			    !vcpu_dy_runnable(vcpu))
 				continue;
 			if (READ_ONCE(vcpu->preempted) && yield_to_kernel_mode &&
 				!kvm_arch_vcpu_in_kernel(vcpu))
-- 
2.16.4


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH 3/4] kvm: Replace vcpu->swait with rcuwait
  2020-03-24  4:44 ` [PATCH 3/4] kvm: Replace vcpu->swait with rcuwait Davidlohr Bueso
@ 2020-03-24 12:07   ` kbuild test robot
       [not found]   ` <202003250014.iSvLXrUS%lkp@intel.com>
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 15+ messages in thread
From: kbuild test robot @ 2020-03-24 12:07 UTC (permalink / raw)
  To: Davidlohr Bueso
  Cc: kbuild-all, tglx, pbonzini, bigeasy, peterz, rostedt, torvalds,
	will, joel, linux-kernel, kvm, dave, Paul Mackerras, kvmarm,
	linux-mips, Davidlohr Bueso

[-- Attachment #1: Type: text/plain, Size: 4465 bytes --]

Hi Davidlohr,

I love your patch! Yet something to improve:

[auto build test ERROR on next-20200323]
[also build test ERROR on v5.6-rc7]
[cannot apply to kvm/linux-next kvmarm/next linus/master kvm-ppc/kvm-ppc-next v5.6-rc7 v5.6-rc6 v5.6-rc5]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:    https://github.com/0day-ci/linux/commits/Davidlohr-Bueso/kvm-Use-rcuwait-for-vcpu-blocking/20200324-155230
base:    5149100c3aebe5e640d6ff68e0b5e5a7eb8638e0
config: s390-randconfig-a001-20200324 (attached as .config)
compiler: s390-linux-gcc (GCC) 9.2.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        GCC_VERSION=9.2.0 make.cross ARCH=s390 

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   arch/s390/kvm/../../../virt/kvm/kvm_main.c: In function 'kvm_vcpu_block':
>> arch/s390/kvm/../../../virt/kvm/kvm_main.c:2698:16: error: macro "rcuwait_wait_event" passed 3 arguments, but takes just 2
    2698 |       TASK_IDLE);
         |                ^
   In file included from include/linux/percpu-rwsem.h:7,
                    from include/linux/fs.h:34,
                    from include/linux/huge_mm.h:8,
                    from include/linux/mm.h:681,
                    from include/linux/kvm_host.h:14,
                    from arch/s390/kvm/../../../virt/kvm/kvm_main.c:18:
   include/linux/rcuwait.h:33: note: macro "rcuwait_wait_event" defined here
      33 | #define rcuwait_wait_event(w, condition)    \
         | 
>> arch/s390/kvm/../../../virt/kvm/kvm_main.c:2696:2: error: 'rcuwait_wait_event' undeclared (first use in this function)
    2696 |  rcuwait_wait_event(&vcpu->wait,
         |  ^~~~~~~~~~~~~~~~~~
   arch/s390/kvm/../../../virt/kvm/kvm_main.c:2696:2: note: each undeclared identifier is reported only once for each function it appears in

vim +/rcuwait_wait_event +2698 arch/s390/kvm/../../../virt/kvm/kvm_main.c

  2664	
  2665	/*
  2666	 * The vCPU has executed a HLT instruction with in-kernel mode enabled.
  2667	 */
  2668	void kvm_vcpu_block(struct kvm_vcpu *vcpu)
  2669	{
  2670		ktime_t start, cur;
  2671		u64 block_ns;
  2672		int block_check = -EINTR;
  2673	
  2674		kvm_arch_vcpu_blocking(vcpu);
  2675	
  2676		start = cur = ktime_get();
  2677		if (vcpu->halt_poll_ns && !kvm_arch_no_poll(vcpu)) {
  2678			ktime_t stop = ktime_add_ns(ktime_get(), vcpu->halt_poll_ns);
  2679	
  2680			++vcpu->stat.halt_attempted_poll;
  2681			do {
  2682				/*
  2683				 * This sets KVM_REQ_UNHALT if an interrupt
  2684				 * arrives.
  2685				 */
  2686				if (kvm_vcpu_check_block(vcpu) < 0) {
  2687					++vcpu->stat.halt_successful_poll;
  2688					if (!vcpu_valid_wakeup(vcpu))
  2689						++vcpu->stat.halt_poll_invalid;
  2690					goto out;
  2691				}
  2692				cur = ktime_get();
  2693			} while (single_task_running() && ktime_before(cur, stop));
  2694		}
  2695	
> 2696		rcuwait_wait_event(&vcpu->wait,
  2697				   (block_check = kvm_vcpu_check_block(vcpu)) < 0,
> 2698				   TASK_IDLE);
  2699		cur = ktime_get();
  2700	out:
  2701		kvm_arch_vcpu_unblocking(vcpu);
  2702		block_ns = ktime_to_ns(cur) - ktime_to_ns(start);
  2703	
  2704		if (!kvm_arch_no_poll(vcpu)) {
  2705			if (!vcpu_valid_wakeup(vcpu)) {
  2706				shrink_halt_poll_ns(vcpu);
  2707			} else if (halt_poll_ns) {
  2708				if (block_ns <= vcpu->halt_poll_ns)
  2709					;
  2710				/* we had a long block, shrink polling */
  2711				else if (vcpu->halt_poll_ns && block_ns > halt_poll_ns)
  2712					shrink_halt_poll_ns(vcpu);
  2713				/* we had a short halt and our poll time is too small */
  2714				else if (vcpu->halt_poll_ns < halt_poll_ns &&
  2715					block_ns < halt_poll_ns)
  2716					grow_halt_poll_ns(vcpu);
  2717			} else {
  2718				vcpu->halt_poll_ns = 0;
  2719			}
  2720		}
  2721	
  2722		trace_kvm_vcpu_wakeup(block_ns, !block_check, vcpu_valid_wakeup(vcpu));
  2723		kvm_arch_vcpu_block_finish(vcpu);
  2724	}
  2725	EXPORT_SYMBOL_GPL(kvm_vcpu_block);
  2726	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 29095 bytes --]

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 3/4] kvm: Replace vcpu->swait with rcuwait
       [not found]   ` <202003250014.iSvLXrUS%lkp@intel.com>
@ 2020-03-24 17:56     ` Davidlohr Bueso
  0 siblings, 0 replies; 15+ messages in thread
From: Davidlohr Bueso @ 2020-03-24 17:56 UTC (permalink / raw)
  To: kbuild test robot
  Cc: kbuild-all, tglx, pbonzini, bigeasy, peterz, rostedt, torvalds,
	will, joel, linux-kernel, kvm, Paul Mackerras, kvmarm,
	linux-mips, Davidlohr Bueso

On Wed, 25 Mar 2020, kbuild test robot wrote:

>   arch/x86/kvm/../../../virt/kvm/kvm_main.c: In function 'kvm_vcpu_block':
>>> arch/x86/kvm/../../../virt/kvm/kvm_main.c:2698:16: error: macro "rcuwait_wait_event" passed 3 arguments, but takes just 2
>          TASK_IDLE);

This is because this series relies on the rcuwait change:

https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git/commit/?h=locking/core&id=80fbaf1c3f2926c834f8ca915441dfe27ce5487e

Thanks,
Davidlohr

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 3/4] kvm: Replace vcpu->swait with rcuwait
  2020-03-24  4:44 ` [PATCH 3/4] kvm: Replace vcpu->swait with rcuwait Davidlohr Bueso
  2020-03-24 12:07   ` kbuild test robot
       [not found]   ` <202003250014.iSvLXrUS%lkp@intel.com>
@ 2020-03-25 17:09   ` Paolo Bonzini
  2020-04-14 21:12     ` Davidlohr Bueso
  2020-04-20 16:41   ` [PATCH v2] " Davidlohr Bueso
  3 siblings, 1 reply; 15+ messages in thread
From: Paolo Bonzini @ 2020-03-25 17:09 UTC (permalink / raw)
  To: Davidlohr Bueso, tglx
  Cc: bigeasy, peterz, rostedt, torvalds, will, joel, linux-kernel,
	kvm, Paul Mackerras, kvmarm, linux-mips, Davidlohr Bueso

On 24/03/20 05:44, Davidlohr Bueso wrote:
> diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
> index 71244bf87c3a..e049fcb3dffb 100644
> --- a/arch/mips/kvm/mips.c
> +++ b/arch/mips/kvm/mips.c
> @@ -290,8 +290,7 @@ static enum hrtimer_restart kvm_mips_comparecount_wakeup(struct hrtimer *timer)
>  	kvm_mips_callbacks->queue_timer_int(vcpu);
>  
>  	vcpu->arch.wait = 0;
> -	if (swq_has_sleeper(&vcpu->wq))
> -		swake_up_one(&vcpu->wq);
> +	rcuwait_wake_up(&vcpu->wait)

This is missing a semicolon.  (KVM MIPS is known not to compile and will
be changed to "depends on BROKEN" in 5.7).

Paolo

>  	return kvm_mips_count_timeout(vcpu);


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 3/4] kvm: Replace vcpu->swait with rcuwait
  2020-03-25 17:09   ` Paolo Bonzini
@ 2020-04-14 21:12     ` Davidlohr Bueso
  2020-04-15 12:11       ` Paolo Bonzini
  0 siblings, 1 reply; 15+ messages in thread
From: Davidlohr Bueso @ 2020-04-14 21:12 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: tglx, bigeasy, peterz, rostedt, torvalds, will, joel,
	linux-kernel, kvm, Paul Mackerras, kvmarm, linux-mips,
	Davidlohr Bueso

On Wed, 25 Mar 2020, Paolo Bonzini wrote:

>On 24/03/20 05:44, Davidlohr Bueso wrote:
>> diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
>> index 71244bf87c3a..e049fcb3dffb 100644
>> --- a/arch/mips/kvm/mips.c
>> +++ b/arch/mips/kvm/mips.c
>> @@ -290,8 +290,7 @@ static enum hrtimer_restart kvm_mips_comparecount_wakeup(struct hrtimer *timer)
>>	kvm_mips_callbacks->queue_timer_int(vcpu);
>>
>>	vcpu->arch.wait = 0;
>> -	if (swq_has_sleeper(&vcpu->wq))
>> -		swake_up_one(&vcpu->wq);
>> +	rcuwait_wake_up(&vcpu->wait)
>
>This is missing a semicolon.  (KVM MIPS is known not to compile and will
>be changed to "depends on BROKEN" in 5.7).

Do you want me to send another version with this fix or do you prefer
fixing it when/if picked up?

Thanks,
Davidlohr

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 3/4] kvm: Replace vcpu->swait with rcuwait
  2020-04-14 21:12     ` Davidlohr Bueso
@ 2020-04-15 12:11       ` Paolo Bonzini
  0 siblings, 0 replies; 15+ messages in thread
From: Paolo Bonzini @ 2020-04-15 12:11 UTC (permalink / raw)
  To: Davidlohr Bueso
  Cc: tglx, bigeasy, peterz, rostedt, torvalds, will, joel,
	linux-kernel, kvm, Paul Mackerras, kvmarm, linux-mips,
	Davidlohr Bueso

On 14/04/20 23:12, Davidlohr Bueso wrote:
> On Wed, 25 Mar 2020, Paolo Bonzini wrote:
> 
>> On 24/03/20 05:44, Davidlohr Bueso wrote:
>>> diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
>>> index 71244bf87c3a..e049fcb3dffb 100644
>>> --- a/arch/mips/kvm/mips.c
>>> +++ b/arch/mips/kvm/mips.c
>>> @@ -290,8 +290,7 @@ static enum hrtimer_restart
>>> kvm_mips_comparecount_wakeup(struct hrtimer *timer)
>>>     kvm_mips_callbacks->queue_timer_int(vcpu);
>>>
>>>     vcpu->arch.wait = 0;
>>> -    if (swq_has_sleeper(&vcpu->wq))
>>> -        swake_up_one(&vcpu->wq);
>>> +    rcuwait_wake_up(&vcpu->wait)
>>
>> This is missing a semicolon.  (KVM MIPS is known not to compile and will
>> be changed to "depends on BROKEN" in 5.7).
> 
> Do you want me to send another version with this fix or do you prefer
> fixing it when/if picked up?

It's up to the TIP tree people, but sending a fixed version is probably
the best way to get their attention. :)

I can also queue it myself (for 5.7 even) if I get an Acked-by from
Peter though.

Paolo


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v2] kvm: Replace vcpu->swait with rcuwait
  2020-03-24  4:44 ` [PATCH 3/4] kvm: Replace vcpu->swait with rcuwait Davidlohr Bueso
                     ` (2 preceding siblings ...)
  2020-03-25 17:09   ` Paolo Bonzini
@ 2020-04-20 16:41   ` Davidlohr Bueso
  2020-04-20 17:12     ` Marc Zyngier
  3 siblings, 1 reply; 15+ messages in thread
From: Davidlohr Bueso @ 2020-04-20 16:41 UTC (permalink / raw)
  To: tglx, pbonzini
  Cc: bigeasy, peterz, rostedt, torvalds, will, joel, linux-kernel,
	kvm, Paul Mackerras, kvmarm, linux-mips, Davidlohr Bueso

The use of any sort of waitqueue (simple or regular) for
wait/waking vcpus has always been an overkill and semantically
wrong. Because this is per-vcpu (which is blocked) there is
only ever a single waiting vcpu, thus no need for any sort of
queue.

As such, make use of the rcuwait primitive, with the following
considerations:

  - rcuwait already provides the proper barriers that serialize
  concurrent waiter and waker.

  - Task wakeup is done in rcu read critical region, with a
  stable task pointer.

  - Because there is no concurrency among waiters, we need
  not worry about rcuwait_wait_event() calls corrupting
  the wait->task. As a consequence, this saves the locking
  done in swait when modifying the queue. This also applies
  to per-vcore wait for powerpc kvm-hv.

The x86-tscdeadline_latency test mentioned in 8577370fb0cb
("KVM: Use simple waitqueue for vcpu->wq") shows that, on avg,
latency is reduced by around 15-20% with this change.

This patch also changes TASK_INTERRUPTIBLE for TASK_IDLE, as
kvm is (ab)using the former such that idle vcpus do no contribute
to the loadavg. Let use the correct semantics for this.

Cc: Paul Mackerras <paulus@ozlabs.org>
Cc: kvmarm@lists.cs.columbia.edu
Cc: linux-mips@vger.kernel.org
Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
---
v2: Added missing semicolon in mips change.

The rest of the patches in this series continues to apply on tip,
as such I am only sending a v2 for this particular patch.

 arch/mips/kvm/mips.c                  |  6 ++----
 arch/powerpc/include/asm/kvm_book3s.h |  2 +-
 arch/powerpc/include/asm/kvm_host.h   |  2 +-
 arch/powerpc/kvm/book3s_hv.c          | 22 ++++++++--------------
 arch/powerpc/kvm/powerpc.c            |  2 +-
 arch/x86/kvm/lapic.c                  |  2 +-
 include/linux/kvm_host.h              | 10 +++++-----
 virt/kvm/arm/arch_timer.c             |  2 +-
 virt/kvm/arm/arm.c                    |  9 +++++----
 virt/kvm/async_pf.c                   |  3 +--
 virt/kvm/kvm_main.c                   | 31 +++++++++++--------------------
 11 files changed, 37 insertions(+), 54 deletions(-)

diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 71244bf87c3a..c14166dcac51 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -290,8 +290,7 @@ static enum hrtimer_restart kvm_mips_comparecount_wakeup(struct hrtimer *timer)
 	kvm_mips_callbacks->queue_timer_int(vcpu);
 
 	vcpu->arch.wait = 0;
-	if (swq_has_sleeper(&vcpu->wq))
-		swake_up_one(&vcpu->wq);
+	rcuwait_wake_up(&vcpu->wait);
 
 	return kvm_mips_count_timeout(vcpu);
 }
@@ -517,8 +516,7 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
 
 	dvcpu->arch.wait = 0;
 
-	if (swq_has_sleeper(&dvcpu->wq))
-		swake_up_one(&dvcpu->wq);
+	rcuwait_wake_up(&dvcpu->wait);
 
 	return 0;
 }
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 506e4df2d730..6e5d85ba588d 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -78,7 +78,7 @@ struct kvmppc_vcore {
 	struct kvm_vcpu *runnable_threads[MAX_SMT_THREADS];
 	struct list_head preempt_list;
 	spinlock_t lock;
-	struct swait_queue_head wq;
+	struct rcuwait wait;
 	spinlock_t stoltb_lock;	/* protects stolen_tb and preempt_tb */
 	u64 stolen_tb;
 	u64 preempt_tb;
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 6e8b8ffd06ad..e2b4a1e3fb7d 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -752,7 +752,7 @@ struct kvm_vcpu_arch {
 	u8 irq_pending; /* Used by XIVE to signal pending guest irqs */
 	u32 last_inst;
 
-	struct swait_queue_head *wqp;
+	struct rcuwait *waitp;
 	struct kvmppc_vcore *vcore;
 	int ret;
 	int trap;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 2cefd071b848..1a7a22122211 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -231,13 +231,11 @@ static bool kvmppc_ipi_thread(int cpu)
 static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
 {
 	int cpu;
-	struct swait_queue_head *wqp;
+	struct rcuwait *wait;
 
-	wqp = kvm_arch_vcpu_wq(vcpu);
-	if (swq_has_sleeper(wqp)) {
-		swake_up_one(wqp);
+	wait = kvm_arch_vcpu_get_wait(vcpu);
+	if (rcuwait_wake_up(wait))
 		++vcpu->stat.halt_wakeup;
-	}
 
 	cpu = READ_ONCE(vcpu->arch.thread_cpu);
 	if (cpu >= 0 && kvmppc_ipi_thread(cpu))
@@ -2116,7 +2114,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int id)
 
 	spin_lock_init(&vcore->lock);
 	spin_lock_init(&vcore->stoltb_lock);
-	init_swait_queue_head(&vcore->wq);
+	rcuwait_init(&vcore->wait);
 	vcore->preempt_tb = TB_NIL;
 	vcore->lpcr = kvm->arch.lpcr;
 	vcore->first_vcpuid = id;
@@ -3779,7 +3777,6 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
 	ktime_t cur, start_poll, start_wait;
 	int do_sleep = 1;
 	u64 block_ns;
-	DECLARE_SWAITQUEUE(wait);
 
 	/* Poll for pending exceptions and ceded state */
 	cur = start_poll = ktime_get();
@@ -3807,10 +3804,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
 		}
 	}
 
-	prepare_to_swait_exclusive(&vc->wq, &wait, TASK_INTERRUPTIBLE);
-
 	if (kvmppc_vcore_check_block(vc)) {
-		finish_swait(&vc->wq, &wait);
 		do_sleep = 0;
 		/* If we polled, count this as a successful poll */
 		if (vc->halt_poll_ns)
@@ -3823,8 +3817,8 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
 	vc->vcore_state = VCORE_SLEEPING;
 	trace_kvmppc_vcore_blocked(vc, 0);
 	spin_unlock(&vc->lock);
-	schedule();
-	finish_swait(&vc->wq, &wait);
+	rcuwait_wait_event(&vc->wait,
+			   kvmppc_vcore_check_block(vc), TASK_IDLE);
 	spin_lock(&vc->lock);
 	vc->vcore_state = VCORE_INACTIVE;
 	trace_kvmppc_vcore_blocked(vc, 1);
@@ -3935,7 +3929,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 			kvmppc_start_thread(vcpu, vc);
 			trace_kvm_guest_enter(vcpu);
 		} else if (vc->vcore_state == VCORE_SLEEPING) {
-			swake_up_one(&vc->wq);
+		        rcuwait_wake_up(&vc->wait);
 		}
 
 	}
@@ -4274,7 +4268,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	}
 	user_vrsave = mfspr(SPRN_VRSAVE);
 
-	vcpu->arch.wqp = &vcpu->arch.vcore->wq;
+	vcpu->arch.waitp = &vcpu->arch.vcore->wait;
 	vcpu->arch.pgdir = kvm->mm->pgd;
 	vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
 
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 302e9dccdd6d..32a0fab53fc9 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -754,7 +754,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
 	if (err)
 		goto out_vcpu_uninit;
 
-	vcpu->arch.wqp = &vcpu->wq;
+	vcpu->arch.waitp = &vcpu->wait;
 	kvmppc_create_vcpu_debugfs(vcpu, vcpu->vcpu_id);
 	return 0;
 
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index e3099c642fec..a4420c26dfbc 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1815,7 +1815,7 @@ void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
 	/* If the preempt notifier has already run, it also called apic_timer_expired */
 	if (!apic->lapic_timer.hv_timer_in_use)
 		goto out;
-	WARN_ON(swait_active(&vcpu->wq));
+	WARN_ON(rcu_dereference(vcpu->wait.task));
 	cancel_hv_timer(apic);
 	apic_timer_expired(apic);
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index bcb9b2ac0791..93ab0ab66de5 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -23,7 +23,7 @@
 #include <linux/irqflags.h>
 #include <linux/context_tracking.h>
 #include <linux/irqbypass.h>
-#include <linux/swait.h>
+#include <linux/rcuwait.h>
 #include <linux/refcount.h>
 #include <linux/nospec.h>
 #include <asm/signal.h>
@@ -277,7 +277,7 @@ struct kvm_vcpu {
 	struct mutex mutex;
 	struct kvm_run *run;
 
-	struct swait_queue_head wq;
+	struct rcuwait wait;
 	struct pid __rcu *pid;
 	int sigset_active;
 	sigset_t sigset;
@@ -952,12 +952,12 @@ static inline bool kvm_arch_has_assigned_device(struct kvm *kvm)
 }
 #endif
 
-static inline struct swait_queue_head *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
+static inline struct rcuwait *kvm_arch_vcpu_get_wait(struct kvm_vcpu *vcpu)
 {
 #ifdef __KVM_HAVE_ARCH_WQP
-	return vcpu->arch.wqp;
+	return vcpu->arch.waitp;
 #else
-	return &vcpu->wq;
+	return &vcpu->wait;
 #endif
 }
 
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 0d9438e9de2a..4be71cb58691 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -593,7 +593,7 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
 	if (map.emul_ptimer)
 		soft_timer_cancel(&map.emul_ptimer->hrtimer);
 
-	if (swait_active(kvm_arch_vcpu_wq(vcpu)))
+	if (rcu_dereference(kvm_arch_vpu_get_wait(vcpu)) != NULL)
 		kvm_timer_blocking(vcpu);
 
 	/*
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index eda7b624eab8..98740a5b8c9b 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -579,16 +579,17 @@ void kvm_arm_resume_guest(struct kvm *kvm)
 
 	kvm_for_each_vcpu(i, vcpu, kvm) {
 		vcpu->arch.pause = false;
-		swake_up_one(kvm_arch_vcpu_wq(vcpu));
+		rcuwait_wake_up(kvm_arch_vcpu_get_wait(vcpu));
 	}
 }
 
 static void vcpu_req_sleep(struct kvm_vcpu *vcpu)
 {
-	struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu);
+	struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu);
 
-	swait_event_interruptible_exclusive(*wq, ((!vcpu->arch.power_off) &&
-				       (!vcpu->arch.pause)));
+	rcuwait_wait_event(*wait,
+			   (!vcpu->arch.power_off) &&(!vcpu->arch.pause),
+			   TASK_IDLE);
 
 	if (vcpu->arch.power_off || vcpu->arch.pause) {
 		/* Awaken to handle a signal, request we sleep again later. */
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 15e5b037f92d..10b533f641a6 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -80,8 +80,7 @@ static void async_pf_execute(struct work_struct *work)
 
 	trace_kvm_async_pf_completed(addr, cr2_or_gpa);
 
-	if (swq_has_sleeper(&vcpu->wq))
-		swake_up_one(&vcpu->wq);
+	rcuwait_wake_up(&vcpu->wait);
 
 	mmput(mm);
 	kvm_put_kvm(vcpu->kvm);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 70f03ce0e5c1..887efb39fb1a 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -343,7 +343,7 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
 	vcpu->kvm = kvm;
 	vcpu->vcpu_id = id;
 	vcpu->pid = NULL;
-	init_swait_queue_head(&vcpu->wq);
+	rcuwait_init(&vcpu->wait);
 	kvm_async_pf_vcpu_init(vcpu);
 
 	vcpu->pre_pcpu = -1;
@@ -2465,9 +2465,8 @@ static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu)
 void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 {
 	ktime_t start, cur;
-	DECLARE_SWAITQUEUE(wait);
-	bool waited = false;
 	u64 block_ns;
+	int block_check = -EINTR;
 
 	kvm_arch_vcpu_blocking(vcpu);
 
@@ -2491,17 +2490,9 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 		} while (single_task_running() && ktime_before(cur, stop));
 	}
 
-	for (;;) {
-		prepare_to_swait_exclusive(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
-
-		if (kvm_vcpu_check_block(vcpu) < 0)
-			break;
-
-		waited = true;
-		schedule();
-	}
-
-	finish_swait(&vcpu->wq, &wait);
+	rcuwait_wait_event(&vcpu->wait,
+			   (block_check = kvm_vcpu_check_block(vcpu)) < 0,
+			   TASK_IDLE);
 	cur = ktime_get();
 out:
 	kvm_arch_vcpu_unblocking(vcpu);
@@ -2525,18 +2516,17 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 		}
 	}
 
-	trace_kvm_vcpu_wakeup(block_ns, waited, vcpu_valid_wakeup(vcpu));
+	trace_kvm_vcpu_wakeup(block_ns, !block_check, vcpu_valid_wakeup(vcpu));
 	kvm_arch_vcpu_block_finish(vcpu);
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_block);
 
 bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu)
 {
-	struct swait_queue_head *wqp;
+	struct rcuwait *wait;
 
-	wqp = kvm_arch_vcpu_wq(vcpu);
-	if (swq_has_sleeper(wqp)) {
-		swake_up_one(wqp);
+	wait = kvm_arch_vcpu_get_wait(vcpu);
+	if (rcuwait_wake_up(wait)) {
 		WRITE_ONCE(vcpu->ready, true);
 		++vcpu->stat.halt_wakeup;
 		return true;
@@ -2678,7 +2668,8 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode)
 				continue;
 			if (vcpu == me)
 				continue;
-			if (swait_active(&vcpu->wq) && !vcpu_dy_runnable(vcpu))
+			if (rcu_dereference(vcpu->wait.task) &&
+			    !vcpu_dy_runnable(vcpu))
 				continue;
 			if (READ_ONCE(vcpu->preempted) && yield_to_kernel_mode &&
 				!kvm_arch_vcpu_in_kernel(vcpu))
-- 
2.16.4

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] kvm: Replace vcpu->swait with rcuwait
  2020-04-20 16:41   ` [PATCH v2] " Davidlohr Bueso
@ 2020-04-20 17:12     ` Marc Zyngier
  2020-04-20 17:25       ` Paolo Bonzini
  2020-04-20 20:56       ` Davidlohr Bueso
  0 siblings, 2 replies; 15+ messages in thread
From: Marc Zyngier @ 2020-04-20 17:12 UTC (permalink / raw)
  To: Davidlohr Bueso
  Cc: tglx, pbonzini, kvm, Davidlohr Bueso, peterz, torvalds, bigeasy,
	linux-kernel, rostedt, linux-mips, Paul Mackerras, joel, will,
	kvmarm

On 2020-04-20 17:41, Davidlohr Bueso wrote:
> The use of any sort of waitqueue (simple or regular) for
> wait/waking vcpus has always been an overkill and semantically
> wrong. Because this is per-vcpu (which is blocked) there is
> only ever a single waiting vcpu, thus no need for any sort of
> queue.
> 
> As such, make use of the rcuwait primitive, with the following
> considerations:
> 
>  - rcuwait already provides the proper barriers that serialize
>  concurrent waiter and waker.
> 
>  - Task wakeup is done in rcu read critical region, with a
>  stable task pointer.
> 
>  - Because there is no concurrency among waiters, we need
>  not worry about rcuwait_wait_event() calls corrupting
>  the wait->task. As a consequence, this saves the locking
>  done in swait when modifying the queue. This also applies
>  to per-vcore wait for powerpc kvm-hv.
> 
> The x86-tscdeadline_latency test mentioned in 8577370fb0cb
> ("KVM: Use simple waitqueue for vcpu->wq") shows that, on avg,
> latency is reduced by around 15-20% with this change.
> 
> This patch also changes TASK_INTERRUPTIBLE for TASK_IDLE, as
> kvm is (ab)using the former such that idle vcpus do no contribute
> to the loadavg. Let use the correct semantics for this.
> 
> Cc: Paul Mackerras <paulus@ozlabs.org>
> Cc: kvmarm@lists.cs.columbia.edu
> Cc: linux-mips@vger.kernel.org
> Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
> ---
> v2: Added missing semicolon in mips change.
> 
> The rest of the patches in this series continues to apply on tip,
> as such I am only sending a v2 for this particular patch.
> 
> arch/mips/kvm/mips.c                  |  6 ++----
> arch/powerpc/include/asm/kvm_book3s.h |  2 +-
> arch/powerpc/include/asm/kvm_host.h   |  2 +-
> arch/powerpc/kvm/book3s_hv.c          | 22 ++++++++--------------
> arch/powerpc/kvm/powerpc.c            |  2 +-
> arch/x86/kvm/lapic.c                  |  2 +-
> include/linux/kvm_host.h              | 10 +++++-----
> virt/kvm/arm/arch_timer.c             |  2 +-
> virt/kvm/arm/arm.c                    |  9 +++++----
> virt/kvm/async_pf.c                   |  3 +--
> virt/kvm/kvm_main.c                   | 31 
> +++++++++++--------------------
> 11 files changed, 37 insertions(+), 54 deletions(-)

[...]

> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 70f03ce0e5c1..887efb39fb1a 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -343,7 +343,7 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu,
> struct kvm *kvm, unsigned id)
> 	vcpu->kvm = kvm;
> 	vcpu->vcpu_id = id;
> 	vcpu->pid = NULL;
> -	init_swait_queue_head(&vcpu->wq);
> +	rcuwait_init(&vcpu->wait);
> 	kvm_async_pf_vcpu_init(vcpu);
> 
> 	vcpu->pre_pcpu = -1;
> @@ -2465,9 +2465,8 @@ static int kvm_vcpu_check_block(struct kvm_vcpu 
> *vcpu)
> void kvm_vcpu_block(struct kvm_vcpu *vcpu)
> {
> 	ktime_t start, cur;
> -	DECLARE_SWAITQUEUE(wait);
> -	bool waited = false;
> 	u64 block_ns;
> +	int block_check = -EINTR;
> 
> 	kvm_arch_vcpu_blocking(vcpu);
> 
> @@ -2491,17 +2490,9 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
> 		} while (single_task_running() && ktime_before(cur, stop));
> 	}
> 
> -	for (;;) {
> -		prepare_to_swait_exclusive(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
> -
> -		if (kvm_vcpu_check_block(vcpu) < 0)
> -			break;
> -
> -		waited = true;
> -		schedule();
> -	}
> -
> -	finish_swait(&vcpu->wq, &wait);
> +	rcuwait_wait_event(&vcpu->wait,
> +			   (block_check = kvm_vcpu_check_block(vcpu)) < 0,
> +			   TASK_IDLE);
> 	cur = ktime_get();
> out:
> 	kvm_arch_vcpu_unblocking(vcpu);
> @@ -2525,18 +2516,17 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
> 		}
> 	}
> 
> -	trace_kvm_vcpu_wakeup(block_ns, waited, vcpu_valid_wakeup(vcpu));
> +	trace_kvm_vcpu_wakeup(block_ns, !block_check, 
> vcpu_valid_wakeup(vcpu));

This looks like a change in the semantics of the tracepoint. Before this
change, 'waited' would have been true if the vcpu waited at all. Here, 
you'd
have false if it has been interrupted by a signal, even if the vcpu has 
waited
for a period of time.

Thanks,

         M.
-- 
Jazz is not dead. It just smells funny...

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] kvm: Replace vcpu->swait with rcuwait
  2020-04-20 17:12     ` Marc Zyngier
@ 2020-04-20 17:25       ` Paolo Bonzini
  2020-04-20 20:56       ` Davidlohr Bueso
  1 sibling, 0 replies; 15+ messages in thread
From: Paolo Bonzini @ 2020-04-20 17:25 UTC (permalink / raw)
  To: Marc Zyngier, Davidlohr Bueso
  Cc: tglx, kvm, Davidlohr Bueso, peterz, torvalds, bigeasy,
	linux-kernel, rostedt, linux-mips, Paul Mackerras, joel, will,
	kvmarm

On 20/04/20 19:12, Marc Zyngier wrote:
>>
>>
>> -    trace_kvm_vcpu_wakeup(block_ns, waited, vcpu_valid_wakeup(vcpu));
>> +    trace_kvm_vcpu_wakeup(block_ns, !block_check,
>> vcpu_valid_wakeup(vcpu));
> 
> This looks like a change in the semantics of the tracepoint. Before
> this change, 'waited' would have been true if the vcpu waited at all.
> Here, you'd have false if it has been interrupted by a signal, even
> if the vcpu has waited for a period of time.
True, good catch.  Perhaps add macros prepare_to_rcuwait and
finish_rcuwait?

Paolo


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] kvm: Replace vcpu->swait with rcuwait
  2020-04-20 17:12     ` Marc Zyngier
  2020-04-20 17:25       ` Paolo Bonzini
@ 2020-04-20 20:56       ` Davidlohr Bueso
  2020-04-20 21:04         ` Paolo Bonzini
  1 sibling, 1 reply; 15+ messages in thread
From: Davidlohr Bueso @ 2020-04-20 20:56 UTC (permalink / raw)
  To: Marc Zyngier
  Cc: tglx, pbonzini, kvm, Davidlohr Bueso, peterz, torvalds, bigeasy,
	linux-kernel, rostedt, linux-mips, Paul Mackerras, joel, will,
	kvmarm

On Mon, 20 Apr 2020, Marc Zyngier wrote:

>This looks like a change in the semantics of the tracepoint. Before this
>change, 'waited' would have been true if the vcpu waited at all. Here, 
>you'd
>have false if it has been interrupted by a signal, even if the vcpu 
>has waited
>for a period of time.

Hmm but sleeps are now uninterruptible as we're using TASK_IDLE.

Thanks,
Davidlohr

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] kvm: Replace vcpu->swait with rcuwait
  2020-04-20 20:56       ` Davidlohr Bueso
@ 2020-04-20 21:04         ` Paolo Bonzini
  2020-04-20 21:50           ` Davidlohr Bueso
  0 siblings, 1 reply; 15+ messages in thread
From: Paolo Bonzini @ 2020-04-20 21:04 UTC (permalink / raw)
  To: Davidlohr Bueso, Marc Zyngier
  Cc: tglx, kvm, Davidlohr Bueso, peterz, torvalds, bigeasy,
	linux-kernel, rostedt, linux-mips, Paul Mackerras, joel, will,
	kvmarm

On 20/04/20 22:56, Davidlohr Bueso wrote:
> On Mon, 20 Apr 2020, Marc Zyngier wrote:
> 
>> This looks like a change in the semantics of the tracepoint. Before this
>> change, 'waited' would have been true if the vcpu waited at all. Here,
>> you'd
>> have false if it has been interrupted by a signal, even if the vcpu
>> has waited
>> for a period of time.
> 
> Hmm but sleeps are now uninterruptible as we're using TASK_IDLE.

Hold on, does that mean that you can't anymore send a signal in order to
kick a thread out of KVM_RUN?  Or am I just misunderstanding?

Paolo


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] kvm: Replace vcpu->swait with rcuwait
  2020-04-20 21:04         ` Paolo Bonzini
@ 2020-04-20 21:50           ` Davidlohr Bueso
  2020-04-21  9:43             ` Paolo Bonzini
  0 siblings, 1 reply; 15+ messages in thread
From: Davidlohr Bueso @ 2020-04-20 21:50 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Marc Zyngier, tglx, kvm, Davidlohr Bueso, peterz, torvalds,
	bigeasy, linux-kernel, rostedt, linux-mips, Paul Mackerras, joel,
	will, kvmarm

On Mon, 20 Apr 2020, Paolo Bonzini wrote:

>On 20/04/20 22:56, Davidlohr Bueso wrote:
>> On Mon, 20 Apr 2020, Marc Zyngier wrote:
>>
>>> This looks like a change in the semantics of the tracepoint. Before this
>>> change, 'waited' would have been true if the vcpu waited at all. Here,
>>> you'd
>>> have false if it has been interrupted by a signal, even if the vcpu
>>> has waited
>>> for a period of time.
>>
>> Hmm but sleeps are now uninterruptible as we're using TASK_IDLE.
>
>Hold on, does that mean that you can't anymore send a signal in order to
>kick a thread out of KVM_RUN?  Or am I just misunderstanding?

Considering that the return value of the interruptible wait is not
checked, I would not think this breaks KVM_RUN.

Thanks,
Davidlohr

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] kvm: Replace vcpu->swait with rcuwait
  2020-04-20 21:50           ` Davidlohr Bueso
@ 2020-04-21  9:43             ` Paolo Bonzini
  2020-04-21 18:07               ` Davidlohr Bueso
  0 siblings, 1 reply; 15+ messages in thread
From: Paolo Bonzini @ 2020-04-21  9:43 UTC (permalink / raw)
  To: Davidlohr Bueso
  Cc: Marc Zyngier, tglx, kvm, Davidlohr Bueso, peterz, torvalds,
	bigeasy, linux-kernel, rostedt, linux-mips, Paul Mackerras, joel,
	will, kvmarm

On 20/04/20 23:50, Davidlohr Bueso wrote:
> On Mon, 20 Apr 2020, Paolo Bonzini wrote:
> 
>> On 20/04/20 22:56, Davidlohr Bueso wrote:
>>> On Mon, 20 Apr 2020, Marc Zyngier wrote:
>>>
>>>> This looks like a change in the semantics of the tracepoint. Before
>>>> this
>>>> change, 'waited' would have been true if the vcpu waited at all. Here,
>>>> you'd
>>>> have false if it has been interrupted by a signal, even if the vcpu
>>>> has waited
>>>> for a period of time.
>>>
>>> Hmm but sleeps are now uninterruptible as we're using TASK_IDLE.
>>
>> Hold on, does that mean that you can't anymore send a signal in order to
>> kick a thread out of KVM_RUN?  Or am I just misunderstanding?
> 
> Considering that the return value of the interruptible wait is not
> checked, I would not think this breaks KVM_RUN.

What return value?  kvm_vcpu_check_block checks signal_pending, so you
could have a case where the signal is injected but you're not woken up.

Admittedly I am not familiar with how TASK_* work under the hood, but it
does seem to be like that.

Paolo


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] kvm: Replace vcpu->swait with rcuwait
  2020-04-21  9:43             ` Paolo Bonzini
@ 2020-04-21 18:07               ` Davidlohr Bueso
  2020-04-21 19:52                 ` Paolo Bonzini
  0 siblings, 1 reply; 15+ messages in thread
From: Davidlohr Bueso @ 2020-04-21 18:07 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Marc Zyngier, tglx, kvm, Davidlohr Bueso, peterz, torvalds,
	bigeasy, linux-kernel, rostedt, linux-mips, Paul Mackerras, joel,
	will, kvmarm

On Tue, 21 Apr 2020, Paolo Bonzini wrote:

>On 20/04/20 23:50, Davidlohr Bueso wrote:
>> On Mon, 20 Apr 2020, Paolo Bonzini wrote:
>>
>>> On 20/04/20 22:56, Davidlohr Bueso wrote:
>>>> On Mon, 20 Apr 2020, Marc Zyngier wrote:
>>>>
>>>>> This looks like a change in the semantics of the tracepoint. Before
>>>>> this
>>>>> change, 'waited' would have been true if the vcpu waited at all. Here,
>>>>> you'd
>>>>> have false if it has been interrupted by a signal, even if the vcpu
>>>>> has waited
>>>>> for a period of time.
>>>>
>>>> Hmm but sleeps are now uninterruptible as we're using TASK_IDLE.
>>>
>>> Hold on, does that mean that you can't anymore send a signal in order to
>>> kick a thread out of KVM_RUN?  Or am I just misunderstanding?
>>
>> Considering that the return value of the interruptible wait is not
>> checked, I would not think this breaks KVM_RUN.
>
>What return value?  kvm_vcpu_check_block checks signal_pending, so you
>could have a case where the signal is injected but you're not woken up.
>
>Admittedly I am not familiar with how TASK_* work under the hood, but it
>does seem to be like that.

I should have looked closer here - I was thinking about the return value
of rcuwait_wait_event. Yes, that signal_pending check you mention makes
the sleep semantics change bogus as interruptible is no longer just to
avoid contributing to the load balance.

And yes, unfortunately adding prepare_to and finish_rcuwait() looks like the
most reasonable approach to keeping the tracepoint semantics. I also considered
extending rcuwait_wait_event() by another parameter to pass back to the caller
if there was any wait at all, but that enlarges the call and is probably less
generic.

I'll send another version keeping the current sleep and tracepoint semantics.

Thanks,
Davidlohr

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] kvm: Replace vcpu->swait with rcuwait
  2020-04-21 18:07               ` Davidlohr Bueso
@ 2020-04-21 19:52                 ` Paolo Bonzini
  0 siblings, 0 replies; 15+ messages in thread
From: Paolo Bonzini @ 2020-04-21 19:52 UTC (permalink / raw)
  To: Davidlohr Bueso
  Cc: Marc Zyngier, tglx, kvm, Davidlohr Bueso, peterz, torvalds,
	bigeasy, linux-kernel, rostedt, linux-mips, Paul Mackerras, joel,
	will, kvmarm

On 21/04/20 20:07, Davidlohr Bueso wrote:
>> 
> 
> I should have looked closer here - I was thinking about the return
> value of rcuwait_wait_event. Yes, that signal_pending check you
> mention makes the sleep semantics change bogus as interruptible is no
> longer just to avoid contributing to the load balance.
> 
> And yes, unfortunately adding prepare_to and finish_rcuwait() looks
> like the most reasonable approach to keeping the tracepoint
> semantics. I also considered extending rcuwait_wait_event() by
> another parameter to pass back to the caller if there was any wait at
> all, but that enlarges the call and is probably less generic.

Yes, at some point the usual prepare_to/finish APIs become simpler.

> I'll send another version keeping the current sleep and tracepoint 
> semantics.

Thanks---and sorry, I should have noticed that way earlier.

Paolo


^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2020-04-21 19:53 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <20200324044453.15733-1-dave@stgolabs.net>
2020-03-24  4:44 ` [PATCH 3/4] kvm: Replace vcpu->swait with rcuwait Davidlohr Bueso
2020-03-24 12:07   ` kbuild test robot
     [not found]   ` <202003250014.iSvLXrUS%lkp@intel.com>
2020-03-24 17:56     ` Davidlohr Bueso
2020-03-25 17:09   ` Paolo Bonzini
2020-04-14 21:12     ` Davidlohr Bueso
2020-04-15 12:11       ` Paolo Bonzini
2020-04-20 16:41   ` [PATCH v2] " Davidlohr Bueso
2020-04-20 17:12     ` Marc Zyngier
2020-04-20 17:25       ` Paolo Bonzini
2020-04-20 20:56       ` Davidlohr Bueso
2020-04-20 21:04         ` Paolo Bonzini
2020-04-20 21:50           ` Davidlohr Bueso
2020-04-21  9:43             ` Paolo Bonzini
2020-04-21 18:07               ` Davidlohr Bueso
2020-04-21 19:52                 ` Paolo Bonzini

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).