All of lore.kernel.org
 help / color / mirror / Atom feed
From: Marcelo Tosatti <mtosatti@redhat.com>
To: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>,
	linux-doc@vger.kernel.org, Peter Zijlstra <peterz@infradead.org>,
	Jan Kiszka <jan.kiszka@siemens.com>,
	Virtualization <virtualization@lists.linux-foundation.org>,
	Paul Mackerras <paulus@samba.org>,
	"H. Peter Anvin" <hpa@zytor.com>,
	Stefano Stabellini <stefano.stabellini@eu.citrix.com>,
	Xen <xen-devel@lists.xensource.com>,
	Dave Jiang <dave.jiang@intel.com>, KVM <kvm@vger.kernel.org>,
	Glauber Costa <glommer@redhat.com>, X86 <x86@kernel.org>,
	Ingo Molnar <mingo@redhat.com>, Avi Kivity <avi@redhat.com>,
	Rik van Riel <riel@redhat.com>,
	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>,
	Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>,
	Sasha Levin <levinsasha928@gmail.com>,
	Sedat Dilek <sedat.dilek@gmail.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Greg Kroah-Hartman <gregkh@suse.de>,
	LKML <linux-kernel@vger.kernel.org>,
	Dave Hansen <dave@linux
Subject: Re: [PATCH RFC V4 4/5] kvm : pv-ticketlocks support for linux guests running on KVM hypervisor
Date: Tue, 17 Jan 2012 09:02:11 -0200	[thread overview]
Message-ID: <20120117110210.GA17420@amt.cnet> (raw)
In-Reply-To: <20120114182645.8604.68884.sendpatchset@oc5400248562.ibm.com>

On Sat, Jan 14, 2012 at 11:56:46PM +0530, Raghavendra K T wrote:
> Extends Linux guest running on KVM hypervisor to support pv-ticketlocks. 
> 
> During smp_boot_cpus  paravirtualied KVM guest detects if the hypervisor has
> required feature (KVM_FEATURE_PVLOCK_KICK) to support pv-ticketlocks. If so,
>  support for pv-ticketlocks is registered via pv_lock_ops.
> 
> Use KVM_HC_KICK_CPU hypercall to wakeup waiting/halted vcpu.
> 
> Signed-off-by: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
> Signed-off-by: Suzuki Poulose <suzuki@in.ibm.com>
> Signed-off-by: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com>
> ---
> diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
> index 7a94987..cf5327c 100644
> --- a/arch/x86/include/asm/kvm_para.h
> +++ b/arch/x86/include/asm/kvm_para.h
> @@ -195,10 +195,20 @@ void kvm_async_pf_task_wait(u32 token);
>  void kvm_async_pf_task_wake(u32 token);
>  u32 kvm_read_and_reset_pf_reason(void);
>  extern void kvm_disable_steal_time(void);
> -#else
> -#define kvm_guest_init() do { } while (0)
> +
> +#ifdef CONFIG_PARAVIRT_SPINLOCKS
> +void __init kvm_spinlock_init(void);
> +#else /* CONFIG_PARAVIRT_SPINLOCKS */
> +static void kvm_spinlock_init(void)
> +{
> +}
> +#endif /* CONFIG_PARAVIRT_SPINLOCKS */
> +
> +#else /* CONFIG_KVM_GUEST */
> +#define kvm_guest_init() do {} while (0)
>  #define kvm_async_pf_task_wait(T) do {} while(0)
>  #define kvm_async_pf_task_wake(T) do {} while(0)
> +
>  static inline u32 kvm_read_and_reset_pf_reason(void)
>  {
>  	return 0;
> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
> index a9c2116..ec55a0b 100644
> --- a/arch/x86/kernel/kvm.c
> +++ b/arch/x86/kernel/kvm.c
> @@ -33,6 +33,7 @@
>  #include <linux/sched.h>
>  #include <linux/slab.h>
>  #include <linux/kprobes.h>
> +#include <linux/debugfs.h>
>  #include <asm/timer.h>
>  #include <asm/cpu.h>
>  #include <asm/traps.h>
> @@ -545,6 +546,7 @@ static void __init kvm_smp_prepare_boot_cpu(void)
>  #endif
>  	kvm_guest_cpu_init();
>  	native_smp_prepare_boot_cpu();
> +	kvm_spinlock_init();
>  }
>  
>  static void __cpuinit kvm_guest_cpu_online(void *dummy)
> @@ -627,3 +629,250 @@ static __init int activate_jump_labels(void)
>  	return 0;
>  }
>  arch_initcall(activate_jump_labels);
> +
> +#ifdef CONFIG_PARAVIRT_SPINLOCKS
> +
> +enum kvm_contention_stat {
> +	TAKEN_SLOW,
> +	TAKEN_SLOW_PICKUP,
> +	RELEASED_SLOW,
> +	RELEASED_SLOW_KICKED,
> +	NR_CONTENTION_STATS
> +};
> +
> +#ifdef CONFIG_KVM_DEBUG_FS
> +
> +static struct kvm_spinlock_stats
> +{
> +	u32 contention_stats[NR_CONTENTION_STATS];
> +
> +#define HISTO_BUCKETS	30
> +	u32 histo_spin_blocked[HISTO_BUCKETS+1];
> +
> +	u64 time_blocked;
> +} spinlock_stats;
> +
> +static u8 zero_stats;
> +
> +static inline void check_zero(void)
> +{
> +	u8 ret;
> +	u8 old = ACCESS_ONCE(zero_stats);
> +	if (unlikely(old)) {
> +		ret = cmpxchg(&zero_stats, old, 0);
> +		/* This ensures only one fellow resets the stat */
> +		if (ret == old)
> +			memset(&spinlock_stats, 0, sizeof(spinlock_stats));
> +	}
> +}
> +
> +static inline void add_stats(enum kvm_contention_stat var, u32 val)
> +{
> +	check_zero();
> +	spinlock_stats.contention_stats[var] += val;
> +}
> +
> +
> +static inline u64 spin_time_start(void)
> +{
> +	return sched_clock();
> +}
> +
> +static void __spin_time_accum(u64 delta, u32 *array)
> +{
> +	unsigned index = ilog2(delta);
> +
> +	check_zero();
> +
> +	if (index < HISTO_BUCKETS)
> +		array[index]++;
> +	else
> +		array[HISTO_BUCKETS]++;
> +}
> +
> +static inline void spin_time_accum_blocked(u64 start)
> +{
> +	u32 delta = sched_clock() - start;
> +
> +	__spin_time_accum(delta, spinlock_stats.histo_spin_blocked);
> +	spinlock_stats.time_blocked += delta;
> +}
> +
> +static struct dentry *d_spin_debug;
> +static struct dentry *d_kvm_debug;
> +
> +struct dentry *kvm_init_debugfs(void)
> +{
> +	d_kvm_debug = debugfs_create_dir("kvm", NULL);
> +	if (!d_kvm_debug)
> +		printk(KERN_WARNING "Could not create 'kvm' debugfs directory\n");
> +
> +	return d_kvm_debug;
> +}
> +
> +static int __init kvm_spinlock_debugfs(void)
> +{
> +	struct dentry *d_kvm = kvm_init_debugfs();
> +
> +	if (d_kvm == NULL)
> +		return -ENOMEM;
> +
> +	d_spin_debug = debugfs_create_dir("spinlocks", d_kvm);
> +
> +	debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats);
> +
> +	debugfs_create_u32("taken_slow", 0444, d_spin_debug,
> +		   &spinlock_stats.contention_stats[TAKEN_SLOW]);
> +	debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug,
> +		   &spinlock_stats.contention_stats[TAKEN_SLOW_PICKUP]);
> +
> +	debugfs_create_u32("released_slow", 0444, d_spin_debug,
> +		   &spinlock_stats.contention_stats[RELEASED_SLOW]);
> +	debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug,
> +		   &spinlock_stats.contention_stats[RELEASED_SLOW_KICKED]);
> +
> +	debugfs_create_u64("time_blocked", 0444, d_spin_debug,
> +			   &spinlock_stats.time_blocked);
> +
> +	debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug,
> +		     spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1);
> +
> +	return 0;
> +}
> +fs_initcall(kvm_spinlock_debugfs);
> +#else  /* !CONFIG_KVM_DEBUG_FS */
> +#define TIMEOUT			(1 << 10)
> +static inline void add_stats(enum kvm_contention_stat var, u32 val)
> +{
> +}
> +
> +static inline u64 spin_time_start(void)
> +{
> +	return 0;
> +}
> +
> +static inline void spin_time_accum_blocked(u64 start)
> +{
> +}
> +#endif  /* CONFIG_KVM_DEBUG_FS */
> +
> +struct kvm_lock_waiting {
> +	struct arch_spinlock *lock;
> +	__ticket_t want;
> +};
> +
> +/* cpus 'waiting' on a spinlock to become available */
> +static cpumask_t waiting_cpus;
> +
> +/* Track spinlock on which a cpu is waiting */
> +static DEFINE_PER_CPU(struct kvm_lock_waiting, lock_waiting);
> +
> +static void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
> +{
> +	struct kvm_lock_waiting *w = &__get_cpu_var(lock_waiting);
> +	int cpu = smp_processor_id();
> +	u64 start;
> +	unsigned long flags;
> +
> +	start = spin_time_start();
> +
> +	/*
> +	 * Make sure an interrupt handler can't upset things in a
> +	 * partially setup state.
> +	 */
> +	local_irq_save(flags);
> +
> +	/*
> +	 * The ordering protocol on this is that the "lock" pointer
> +	 * may only be set non-NULL if the "want" ticket is correct.
> +	 * If we're updating "want", we must first clear "lock".
> +	 */
> +	w->lock = NULL;
> +	smp_wmb();
> +	w->want = want;
> +	smp_wmb();
> +	w->lock = lock;
> +
> +	add_stats(TAKEN_SLOW, 1);
> +
> +	/*
> +	 * This uses set_bit, which is atomic but we should not rely on its
> +	 * reordering gurantees. So barrier is needed after this call.
> +	 */
> +	cpumask_set_cpu(cpu, &waiting_cpus);
> +
> +	barrier();
> +
> +	/*
> +	 * Mark entry to slowpath before doing the pickup test to make
> +	 * sure we don't deadlock with an unlocker.
> +	 */
> +	__ticket_enter_slowpath(lock);
> +
> +	/*
> +	 * check again make sure it didn't become free while
> +	 * we weren't looking.
> +	 */
> +	if (ACCESS_ONCE(lock->tickets.head) == want) {
> +		add_stats(TAKEN_SLOW_PICKUP, 1);
> +		goto out;
> +	}
> +
> +	/* Allow interrupts while blocked */
> +	local_irq_restore(flags);
> +
> +	/* halt until it's our turn and kicked. */
> +	halt();
> +
> +	local_irq_save(flags);
> +out:
> +	cpumask_clear_cpu(cpu, &waiting_cpus);
> +	w->lock = NULL;
> +	local_irq_restore(flags);
> +	spin_time_accum_blocked(start);
> +}
> +PV_CALLEE_SAVE_REGS_THUNK(kvm_lock_spinning);
> +
> +/* Kick a cpu by its apicid*/
> +static inline void kvm_kick_cpu(int apicid)
> +{
> +	kvm_hypercall1(KVM_HC_KICK_CPU, apicid);
> +}
> +
> +/* Kick vcpu waiting on @lock->head to reach value @ticket */
> +static void kvm_unlock_kick(struct arch_spinlock *lock, __ticket_t ticket)
> +{
> +	int cpu;
> +	int apicid;
> +
> +	add_stats(RELEASED_SLOW, 1);
> +
> +	for_each_cpu(cpu, &waiting_cpus) {
> +		const struct kvm_lock_waiting *w = &per_cpu(lock_waiting, cpu);
> +		if (ACCESS_ONCE(w->lock) == lock &&
> +		    ACCESS_ONCE(w->want) == ticket) {
> +			add_stats(RELEASED_SLOW_KICKED, 1);
> +			apicid = per_cpu(x86_cpu_to_apicid, cpu);
> +			kvm_kick_cpu(apicid);
> +			break;
> +		}
> +	}

What prevents a kick from being lost here, if say, the waiter is at
local_irq_save in kvm_lock_spinning, before the lock/want assignments?

> +
> +/*
> + * Setup pv_lock_ops to exploit KVM_FEATURE_PVLOCK_KICK if present.
> + */
> +void __init kvm_spinlock_init(void)
> +{
> +	if (!kvm_para_available())
> +		return;
> +	/* Does host kernel support KVM_FEATURE_PVLOCK_KICK? */
> +	if (!kvm_para_has_feature(KVM_FEATURE_PVLOCK_KICK))
> +		return;
> +
> +	jump_label_inc(&paravirt_ticketlocks_enabled);
> +
> +	pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(kvm_lock_spinning);
> +	pv_lock_ops.unlock_kick = kvm_unlock_kick;
> +}
> +#endif	/* CONFIG_PARAVIRT_SPINLOCKS */
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index c7b05fc..4d7a950 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -5754,8 +5754,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>  
>  	local_irq_disable();
>  
> -	if (vcpu->mode == EXITING_GUEST_MODE || vcpu->requests
> -	    || need_resched() || signal_pending(current)) {
> +	if (vcpu->mode == EXITING_GUEST_MODE
> +		 || (vcpu->requests & ~(1UL<<KVM_REQ_PVLOCK_KICK))
> +		 || need_resched() || signal_pending(current)) {
>  		vcpu->mode = OUTSIDE_GUEST_MODE;
>  		smp_wmb();
>  		local_irq_enable();
> @@ -6711,6 +6712,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
>  		!vcpu->arch.apf.halted)
>  		|| !list_empty_careful(&vcpu->async_pf.done)
>  		|| vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
> +		|| kvm_check_request(KVM_REQ_PVLOCK_KICK, vcpu)

The bit should only be read here (kvm_arch_vcpu_runnable), but cleared
on vcpu entry (along with the other kvm_check_request processing).

Then the first hunk becomes unnecessary.

Please do not mix host/guest patches.

WARNING: multiple messages have this Message-ID (diff)
From: Marcelo Tosatti <mtosatti@redhat.com>
To: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>,
	linux-doc@vger.kernel.org, Peter Zijlstra <peterz@infradead.org>,
	Jan Kiszka <jan.kiszka@siemens.com>,
	Virtualization <virtualization@lists.linux-foundation.org>,
	Paul Mackerras <paulus@samba.org>,
	"H. Peter Anvin" <hpa@zytor.com>,
	Stefano Stabellini <stefano.stabellini@eu.citrix.com>,
	Xen <xen-devel@lists.xensource.com>,
	Dave Jiang <dave.jiang@intel.com>, KVM <kvm@vger.kernel.org>,
	Glauber Costa <glommer@redhat.com>, X86 <x86@kernel.org>,
	Ingo Molnar <mingo@redhat.com>, Avi Kivity <avi@redhat.com>,
	Rik van Riel <riel@redhat.com>,
	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>,
	Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>,
	Sasha Levin <levinsasha928@gmail.com>,
	Sedat Dilek <sedat.dilek@gmail.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Greg Kroah-Hartman <gregkh@suse.de>,
	LKML <linux-kernel@vger.kernel.org>, Dave Hansen <dave@linux>
Subject: Re: [PATCH RFC V4 4/5] kvm : pv-ticketlocks support for linux guests running on KVM hypervisor
Date: Tue, 17 Jan 2012 09:02:11 -0200	[thread overview]
Message-ID: <20120117110210.GA17420@amt.cnet> (raw)
In-Reply-To: <20120114182645.8604.68884.sendpatchset@oc5400248562.ibm.com>

On Sat, Jan 14, 2012 at 11:56:46PM +0530, Raghavendra K T wrote:
> Extends Linux guest running on KVM hypervisor to support pv-ticketlocks. 
> 
> During smp_boot_cpus  paravirtualied KVM guest detects if the hypervisor has
> required feature (KVM_FEATURE_PVLOCK_KICK) to support pv-ticketlocks. If so,
>  support for pv-ticketlocks is registered via pv_lock_ops.
> 
> Use KVM_HC_KICK_CPU hypercall to wakeup waiting/halted vcpu.
> 
> Signed-off-by: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
> Signed-off-by: Suzuki Poulose <suzuki@in.ibm.com>
> Signed-off-by: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com>
> ---
> diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
> index 7a94987..cf5327c 100644
> --- a/arch/x86/include/asm/kvm_para.h
> +++ b/arch/x86/include/asm/kvm_para.h
> @@ -195,10 +195,20 @@ void kvm_async_pf_task_wait(u32 token);
>  void kvm_async_pf_task_wake(u32 token);
>  u32 kvm_read_and_reset_pf_reason(void);
>  extern void kvm_disable_steal_time(void);
> -#else
> -#define kvm_guest_init() do { } while (0)
> +
> +#ifdef CONFIG_PARAVIRT_SPINLOCKS
> +void __init kvm_spinlock_init(void);
> +#else /* CONFIG_PARAVIRT_SPINLOCKS */
> +static void kvm_spinlock_init(void)
> +{
> +}
> +#endif /* CONFIG_PARAVIRT_SPINLOCKS */
> +
> +#else /* CONFIG_KVM_GUEST */
> +#define kvm_guest_init() do {} while (0)
>  #define kvm_async_pf_task_wait(T) do {} while(0)
>  #define kvm_async_pf_task_wake(T) do {} while(0)
> +
>  static inline u32 kvm_read_and_reset_pf_reason(void)
>  {
>  	return 0;
> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
> index a9c2116..ec55a0b 100644
> --- a/arch/x86/kernel/kvm.c
> +++ b/arch/x86/kernel/kvm.c
> @@ -33,6 +33,7 @@
>  #include <linux/sched.h>
>  #include <linux/slab.h>
>  #include <linux/kprobes.h>
> +#include <linux/debugfs.h>
>  #include <asm/timer.h>
>  #include <asm/cpu.h>
>  #include <asm/traps.h>
> @@ -545,6 +546,7 @@ static void __init kvm_smp_prepare_boot_cpu(void)
>  #endif
>  	kvm_guest_cpu_init();
>  	native_smp_prepare_boot_cpu();
> +	kvm_spinlock_init();
>  }
>  
>  static void __cpuinit kvm_guest_cpu_online(void *dummy)
> @@ -627,3 +629,250 @@ static __init int activate_jump_labels(void)
>  	return 0;
>  }
>  arch_initcall(activate_jump_labels);
> +
> +#ifdef CONFIG_PARAVIRT_SPINLOCKS
> +
> +enum kvm_contention_stat {
> +	TAKEN_SLOW,
> +	TAKEN_SLOW_PICKUP,
> +	RELEASED_SLOW,
> +	RELEASED_SLOW_KICKED,
> +	NR_CONTENTION_STATS
> +};
> +
> +#ifdef CONFIG_KVM_DEBUG_FS
> +
> +static struct kvm_spinlock_stats
> +{
> +	u32 contention_stats[NR_CONTENTION_STATS];
> +
> +#define HISTO_BUCKETS	30
> +	u32 histo_spin_blocked[HISTO_BUCKETS+1];
> +
> +	u64 time_blocked;
> +} spinlock_stats;
> +
> +static u8 zero_stats;
> +
> +static inline void check_zero(void)
> +{
> +	u8 ret;
> +	u8 old = ACCESS_ONCE(zero_stats);
> +	if (unlikely(old)) {
> +		ret = cmpxchg(&zero_stats, old, 0);
> +		/* This ensures only one fellow resets the stat */
> +		if (ret == old)
> +			memset(&spinlock_stats, 0, sizeof(spinlock_stats));
> +	}
> +}
> +
> +static inline void add_stats(enum kvm_contention_stat var, u32 val)
> +{
> +	check_zero();
> +	spinlock_stats.contention_stats[var] += val;
> +}
> +
> +
> +static inline u64 spin_time_start(void)
> +{
> +	return sched_clock();
> +}
> +
> +static void __spin_time_accum(u64 delta, u32 *array)
> +{
> +	unsigned index = ilog2(delta);
> +
> +	check_zero();
> +
> +	if (index < HISTO_BUCKETS)
> +		array[index]++;
> +	else
> +		array[HISTO_BUCKETS]++;
> +}
> +
> +static inline void spin_time_accum_blocked(u64 start)
> +{
> +	u32 delta = sched_clock() - start;
> +
> +	__spin_time_accum(delta, spinlock_stats.histo_spin_blocked);
> +	spinlock_stats.time_blocked += delta;
> +}
> +
> +static struct dentry *d_spin_debug;
> +static struct dentry *d_kvm_debug;
> +
> +struct dentry *kvm_init_debugfs(void)
> +{
> +	d_kvm_debug = debugfs_create_dir("kvm", NULL);
> +	if (!d_kvm_debug)
> +		printk(KERN_WARNING "Could not create 'kvm' debugfs directory\n");
> +
> +	return d_kvm_debug;
> +}
> +
> +static int __init kvm_spinlock_debugfs(void)
> +{
> +	struct dentry *d_kvm = kvm_init_debugfs();
> +
> +	if (d_kvm == NULL)
> +		return -ENOMEM;
> +
> +	d_spin_debug = debugfs_create_dir("spinlocks", d_kvm);
> +
> +	debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats);
> +
> +	debugfs_create_u32("taken_slow", 0444, d_spin_debug,
> +		   &spinlock_stats.contention_stats[TAKEN_SLOW]);
> +	debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug,
> +		   &spinlock_stats.contention_stats[TAKEN_SLOW_PICKUP]);
> +
> +	debugfs_create_u32("released_slow", 0444, d_spin_debug,
> +		   &spinlock_stats.contention_stats[RELEASED_SLOW]);
> +	debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug,
> +		   &spinlock_stats.contention_stats[RELEASED_SLOW_KICKED]);
> +
> +	debugfs_create_u64("time_blocked", 0444, d_spin_debug,
> +			   &spinlock_stats.time_blocked);
> +
> +	debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug,
> +		     spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1);
> +
> +	return 0;
> +}
> +fs_initcall(kvm_spinlock_debugfs);
> +#else  /* !CONFIG_KVM_DEBUG_FS */
> +#define TIMEOUT			(1 << 10)
> +static inline void add_stats(enum kvm_contention_stat var, u32 val)
> +{
> +}
> +
> +static inline u64 spin_time_start(void)
> +{
> +	return 0;
> +}
> +
> +static inline void spin_time_accum_blocked(u64 start)
> +{
> +}
> +#endif  /* CONFIG_KVM_DEBUG_FS */
> +
> +struct kvm_lock_waiting {
> +	struct arch_spinlock *lock;
> +	__ticket_t want;
> +};
> +
> +/* cpus 'waiting' on a spinlock to become available */
> +static cpumask_t waiting_cpus;
> +
> +/* Track spinlock on which a cpu is waiting */
> +static DEFINE_PER_CPU(struct kvm_lock_waiting, lock_waiting);
> +
> +static void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
> +{
> +	struct kvm_lock_waiting *w = &__get_cpu_var(lock_waiting);
> +	int cpu = smp_processor_id();
> +	u64 start;
> +	unsigned long flags;
> +
> +	start = spin_time_start();
> +
> +	/*
> +	 * Make sure an interrupt handler can't upset things in a
> +	 * partially setup state.
> +	 */
> +	local_irq_save(flags);
> +
> +	/*
> +	 * The ordering protocol on this is that the "lock" pointer
> +	 * may only be set non-NULL if the "want" ticket is correct.
> +	 * If we're updating "want", we must first clear "lock".
> +	 */
> +	w->lock = NULL;
> +	smp_wmb();
> +	w->want = want;
> +	smp_wmb();
> +	w->lock = lock;
> +
> +	add_stats(TAKEN_SLOW, 1);
> +
> +	/*
> +	 * This uses set_bit, which is atomic but we should not rely on its
> +	 * reordering gurantees. So barrier is needed after this call.
> +	 */
> +	cpumask_set_cpu(cpu, &waiting_cpus);
> +
> +	barrier();
> +
> +	/*
> +	 * Mark entry to slowpath before doing the pickup test to make
> +	 * sure we don't deadlock with an unlocker.
> +	 */
> +	__ticket_enter_slowpath(lock);
> +
> +	/*
> +	 * check again make sure it didn't become free while
> +	 * we weren't looking.
> +	 */
> +	if (ACCESS_ONCE(lock->tickets.head) == want) {
> +		add_stats(TAKEN_SLOW_PICKUP, 1);
> +		goto out;
> +	}
> +
> +	/* Allow interrupts while blocked */
> +	local_irq_restore(flags);
> +
> +	/* halt until it's our turn and kicked. */
> +	halt();
> +
> +	local_irq_save(flags);
> +out:
> +	cpumask_clear_cpu(cpu, &waiting_cpus);
> +	w->lock = NULL;
> +	local_irq_restore(flags);
> +	spin_time_accum_blocked(start);
> +}
> +PV_CALLEE_SAVE_REGS_THUNK(kvm_lock_spinning);
> +
> +/* Kick a cpu by its apicid*/
> +static inline void kvm_kick_cpu(int apicid)
> +{
> +	kvm_hypercall1(KVM_HC_KICK_CPU, apicid);
> +}
> +
> +/* Kick vcpu waiting on @lock->head to reach value @ticket */
> +static void kvm_unlock_kick(struct arch_spinlock *lock, __ticket_t ticket)
> +{
> +	int cpu;
> +	int apicid;
> +
> +	add_stats(RELEASED_SLOW, 1);
> +
> +	for_each_cpu(cpu, &waiting_cpus) {
> +		const struct kvm_lock_waiting *w = &per_cpu(lock_waiting, cpu);
> +		if (ACCESS_ONCE(w->lock) == lock &&
> +		    ACCESS_ONCE(w->want) == ticket) {
> +			add_stats(RELEASED_SLOW_KICKED, 1);
> +			apicid = per_cpu(x86_cpu_to_apicid, cpu);
> +			kvm_kick_cpu(apicid);
> +			break;
> +		}
> +	}

What prevents a kick from being lost here, if say, the waiter is at
local_irq_save in kvm_lock_spinning, before the lock/want assignments?

> +
> +/*
> + * Setup pv_lock_ops to exploit KVM_FEATURE_PVLOCK_KICK if present.
> + */
> +void __init kvm_spinlock_init(void)
> +{
> +	if (!kvm_para_available())
> +		return;
> +	/* Does host kernel support KVM_FEATURE_PVLOCK_KICK? */
> +	if (!kvm_para_has_feature(KVM_FEATURE_PVLOCK_KICK))
> +		return;
> +
> +	jump_label_inc(&paravirt_ticketlocks_enabled);
> +
> +	pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(kvm_lock_spinning);
> +	pv_lock_ops.unlock_kick = kvm_unlock_kick;
> +}
> +#endif	/* CONFIG_PARAVIRT_SPINLOCKS */
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index c7b05fc..4d7a950 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -5754,8 +5754,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>  
>  	local_irq_disable();
>  
> -	if (vcpu->mode == EXITING_GUEST_MODE || vcpu->requests
> -	    || need_resched() || signal_pending(current)) {
> +	if (vcpu->mode == EXITING_GUEST_MODE
> +		 || (vcpu->requests & ~(1UL<<KVM_REQ_PVLOCK_KICK))
> +		 || need_resched() || signal_pending(current)) {
>  		vcpu->mode = OUTSIDE_GUEST_MODE;
>  		smp_wmb();
>  		local_irq_enable();
> @@ -6711,6 +6712,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
>  		!vcpu->arch.apf.halted)
>  		|| !list_empty_careful(&vcpu->async_pf.done)
>  		|| vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
> +		|| kvm_check_request(KVM_REQ_PVLOCK_KICK, vcpu)

The bit should only be read here (kvm_arch_vcpu_runnable), but cleared
on vcpu entry (along with the other kvm_check_request processing).

Then the first hunk becomes unnecessary.

Please do not mix host/guest patches.

  parent reply	other threads:[~2012-01-17 11:02 UTC|newest]

Thread overview: 139+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-01-14 18:25 [PATCH RFC V4 0/5] kvm : Paravirt-spinlock support for KVM guests Raghavendra K T
2012-01-14 18:25 ` Raghavendra K T
2012-01-14 18:25 ` Raghavendra K T
2012-01-14 18:25 ` [PATCH RFC V4 1/5] debugfs: Add support to print u32 array in debugfs Raghavendra K T
2012-01-14 18:25   ` Raghavendra K T
2012-01-14 18:25   ` Raghavendra K T
2012-01-14 18:25 ` [PATCH RFC V4 2/5] kvm hypervisor : Add a hypercall to KVM hypervisor to support pv-ticketlocks Raghavendra K T
2012-01-14 18:25   ` Raghavendra K T
2012-01-14 18:25   ` Raghavendra K T
2012-01-16  3:24   ` Alexander Graf
2012-01-16  3:24     ` Alexander Graf
2012-01-16  8:43     ` Raghavendra K T
2012-01-16  8:43       ` Raghavendra K T
2012-01-16  9:03   ` Avi Kivity
2012-01-16  9:03     ` Avi Kivity
2012-01-16  9:55     ` Raghavendra K T
2012-01-16  9:55       ` Raghavendra K T
2012-01-14 18:26 ` [PATCH RFC V4 3/5] kvm guest : Added configuration support to enable debug information for KVM Guests Raghavendra K T
2012-01-14 18:26   ` Raghavendra K T
2012-01-14 18:26   ` Raghavendra K T
2012-01-14 18:26 ` [PATCH RFC V4 4/5] kvm : pv-ticketlocks support for linux guests running on KVM hypervisor Raghavendra K T
2012-01-14 18:26   ` Raghavendra K T
2012-01-14 18:26   ` Raghavendra K T
2012-01-16  3:12   ` Alexander Graf
2012-01-16  3:12     ` Alexander Graf
2012-01-16  7:25     ` Raghavendra K T
2012-01-16  7:25       ` Raghavendra K T
2012-01-16  9:05   ` Avi Kivity
2012-01-16  9:05     ` Avi Kivity
2012-01-16 14:13     ` Raghavendra K T
2012-01-16 14:13       ` Raghavendra K T
2012-01-16 14:47       ` Avi Kivity
2012-01-16 14:47         ` Avi Kivity
2012-01-16 23:49         ` Jeremy Fitzhardinge
2012-01-16 23:49           ` Jeremy Fitzhardinge
2012-01-17 11:02   ` Marcelo Tosatti [this message]
2012-01-17 11:02     ` Marcelo Tosatti
2012-01-17 11:33     ` Srivatsa Vaddagiri
2012-01-17 11:33       ` Srivatsa Vaddagiri
2012-01-18  1:34       ` Jeremy Fitzhardinge
2012-01-18  1:34         ` Jeremy Fitzhardinge
2012-01-18 13:54         ` Srivatsa Vaddagiri
2012-01-18 13:54           ` Srivatsa Vaddagiri
2012-01-18 21:52           ` Jeremy Fitzhardinge
2012-01-18 21:52             ` Jeremy Fitzhardinge
2012-01-24 14:08             ` Avi Kivity
2012-01-24 14:08               ` Avi Kivity
2012-01-24 18:51               ` Raghavendra K T
2012-01-24 18:51                 ` Raghavendra K T
2012-01-17 18:57     ` Raghavendra K T
2012-01-17 18:57       ` Raghavendra K T
2012-01-24 19:01       ` Raghavendra K T
2012-01-14 18:27 ` [PATCH RFC V4 5/5] Documentation/kvm : Add documentation on Hypercalls and features used for PV spinlock Raghavendra K T
2012-01-14 18:27 ` Raghavendra K T
2012-01-14 18:27   ` Raghavendra K T
2012-01-14 18:27   ` Raghavendra K T
2012-01-16  3:23   ` Alexander Graf
2012-01-16  3:23     ` Alexander Graf
2012-01-16  3:51     ` Srivatsa Vaddagiri
2012-01-16  3:51       ` Srivatsa Vaddagiri
2012-01-16  4:00       ` Alexander Graf
2012-01-16  4:00         ` Alexander Graf
2012-01-16  8:47         ` Avi Kivity
2012-01-16  8:44     ` Raghavendra K T
2012-01-16  8:44       ` Raghavendra K T
2012-01-16 10:26       ` Alexander Graf
2012-01-16 10:26         ` Alexander Graf
2012-01-16  9:00   ` Avi Kivity
2012-01-16  9:00     ` Avi Kivity
2012-01-16  9:40     ` Srivatsa Vaddagiri
2012-01-16 10:14       ` Avi Kivity
2012-01-16 14:11         ` Srivatsa Vaddagiri
2012-01-17  9:14           ` Gleb Natapov
2012-01-17  9:14             ` Gleb Natapov
2012-01-17 12:26             ` Srivatsa Vaddagiri
2012-01-17 12:26               ` Srivatsa Vaddagiri
2012-01-17 12:51               ` Gleb Natapov
2012-01-17 12:51                 ` Gleb Natapov
2012-01-17 13:11                 ` Srivatsa Vaddagiri
2012-01-17 13:11                   ` Srivatsa Vaddagiri
2012-01-17 13:20                   ` Gleb Natapov
2012-01-17 13:20                     ` Gleb Natapov
2012-01-17 14:28                     ` Srivatsa Vaddagiri
2012-01-17 14:28                       ` Srivatsa Vaddagiri
2012-01-17 15:32                       ` Gleb Natapov
2012-01-17 15:32                         ` Gleb Natapov
2012-01-17 15:53                         ` Marcelo Tosatti
2012-01-17 15:53                           ` Marcelo Tosatti
2012-01-20 15:09                           ` Srivatsa Vaddagiri
2012-01-17 13:13                 ` Raghavendra K T
2012-01-17 13:13                   ` Raghavendra K T
2012-01-16  3:57 ` [PATCH RFC V4 0/5] kvm : Paravirt-spinlock support for KVM guests Alexander Graf
2012-01-16  3:57   ` Alexander Graf
2012-01-16  6:40   ` Jeremy Fitzhardinge
2012-01-16  6:40     ` Jeremy Fitzhardinge
2012-01-16  8:55     ` Avi Kivity
2012-01-16  8:55       ` Avi Kivity
2012-01-16 23:59       ` Jeremy Fitzhardinge
2012-01-16 23:59         ` Jeremy Fitzhardinge
2012-01-18 10:48         ` Raghavendra K T
2012-01-18 10:48           ` Raghavendra K T
2012-01-16 10:24     ` Alexander Graf
2012-01-16 10:24       ` Alexander Graf
2012-01-17  0:30       ` Jeremy Fitzhardinge
2012-01-17  0:30         ` Jeremy Fitzhardinge
2012-01-18 10:23         ` Raghavendra K T
2012-01-18 10:23           ` Raghavendra K T
2012-01-16 13:43   ` Raghavendra K T
2012-01-16 13:43     ` Raghavendra K T
2012-01-16 13:49     ` Avi Kivity
2012-01-16 13:49       ` Avi Kivity
2012-01-16 18:48       ` Raghavendra K T
2012-01-16 18:48         ` Raghavendra K T
2012-01-16 14:20   ` Srivatsa Vaddagiri
2012-01-16 14:20     ` Srivatsa Vaddagiri
2012-01-16 14:23     ` Alexander Graf
2012-01-16 14:23       ` Alexander Graf
2012-01-16 18:38       ` Raghavendra K T
2012-01-16 18:38         ` Raghavendra K T
2012-01-16 18:42         ` Alexander Graf
2012-01-16 18:42           ` Alexander Graf
2012-01-17 17:27           ` Raghavendra K T
2012-01-17 17:27             ` Raghavendra K T
2012-01-17 17:39             ` Alexander Graf
2012-01-17 17:39               ` Alexander Graf
2012-01-17 18:36               ` Raghavendra K T
2012-01-17 18:36                 ` Raghavendra K T
2012-01-17 21:57                 ` Dave Hansen
2012-01-17 21:57                   ` Dave Hansen
2012-01-18  2:27                   ` Raghavendra K T
2012-01-18  2:27                     ` Raghavendra K T
2012-01-25  8:55                 ` Raghavendra K T
2012-01-25  8:55                   ` Raghavendra K T
2012-01-25 16:35                   ` Konrad Rzeszutek Wilk
2012-01-25 16:35                     ` Konrad Rzeszutek Wilk
2012-01-25 17:45                     ` Raghavendra K T
2012-01-25 17:45                       ` Raghavendra K T
2012-01-25 19:05                       ` Konrad Rzeszutek Wilk
2012-01-25 19:05                         ` Konrad Rzeszutek Wilk

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20120117110210.GA17420@amt.cnet \
    --to=mtosatti@redhat.com \
    --cc=avi@redhat.com \
    --cc=dave.jiang@intel.com \
    --cc=dave@linux \
    --cc=glommer@redhat.com \
    --cc=gregkh@suse.de \
    --cc=hpa@zytor.com \
    --cc=jan.kiszka@siemens.com \
    --cc=jeremy@goop.org \
    --cc=konrad.wilk@oracle.com \
    --cc=kvm@vger.kernel.org \
    --cc=levinsasha928@gmail.com \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=paulus@samba.org \
    --cc=peterz@infradead.org \
    --cc=raghavendra.kt@linux.vnet.ibm.com \
    --cc=riel@redhat.com \
    --cc=sedat.dilek@gmail.com \
    --cc=stefano.stabellini@eu.citrix.com \
    --cc=tglx@linutronix.de \
    --cc=vatsa@linux.vnet.ibm.com \
    --cc=virtualization@lists.linux-foundation.org \
    --cc=x86@kernel.org \
    --cc=xen-devel@lists.xensource.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.