All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] xen-evtchn: Bind dyn evtchn:qemu-dm interrupt to next online VCPU
@ 2017-06-02 14:49 Anoob Soman
  2017-06-02 15:10 ` Juergen Gross
                   ` (3 more replies)
  0 siblings, 4 replies; 22+ messages in thread
From: Anoob Soman @ 2017-06-02 14:49 UTC (permalink / raw)
  To: xen-devel, linux-kernel; +Cc: boris.ostrovsky, jgross, Anoob Soman

A HVM domian booting generates around 200K (evtchn:qemu-dm xen-dyn)
interrupts,in a short period of time. All these evtchn:qemu-dm are bound
to VCPU 0, until irqbalance sees these IRQ and moves it to a different VCPU.
In one configuration, irqbalance runs every 10 seconds, which means
irqbalance doesn't get to see these burst of interrupts and doesn't
re-balance interrupts most of the time, making all evtchn:qemu-dm to be
processed by VCPU0. This cause VCPU0 to spend most of time processing
hardirq and very little time on softirq. Moreover, if dom0 kernel PREEMPTION
is disabled, VCPU0 never runs watchdog (process context), triggering a
softlockup detection code to panic.

Binding evtchn:qemu-dm to next online VCPU, will spread hardirq
processing evenly across different CPU. Later, irqbalance will try to balance
evtchn:qemu-dm, if required.

Signed-off-by: Anoob Soman <anoob.soman@citrix.com>
---
 drivers/xen/events/events_base.c |  9 +++++++--
 drivers/xen/evtchn.c             | 36 +++++++++++++++++++++++++++++++++++-
 include/xen/events.h             |  1 +
 3 files changed, 43 insertions(+), 3 deletions(-)

diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index b52852f..8224ec1 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -1303,10 +1303,9 @@ void rebind_evtchn_irq(int evtchn, int irq)
 }
 
 /* Rebind an evtchn so that it gets delivered to a specific cpu */
-static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
+int xen_rebind_evtchn_to_cpu(int evtchn, unsigned tcpu)
 {
 	struct evtchn_bind_vcpu bind_vcpu;
-	int evtchn = evtchn_from_irq(irq);
 	int masked;
 
 	if (!VALID_EVTCHN(evtchn))
@@ -1338,6 +1337,12 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(xen_rebind_evtchn_to_cpu);
+
+static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
+{
+	return xen_rebind_evtchn_to_cpu(evtchn_from_irq(irq), tcpu);
+}
 
 static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest,
 			    bool force)
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
index 10f1ef5..1192f24 100644
--- a/drivers/xen/evtchn.c
+++ b/drivers/xen/evtchn.c
@@ -58,6 +58,8 @@
 #include <xen/xen-ops.h>
 #include <asm/xen/hypervisor.h>
 
+static DEFINE_PER_CPU(int, bind_last_selected_cpu);
+
 struct per_user_data {
 	struct mutex bind_mutex; /* serialize bind/unbind operations */
 	struct rb_root evtchns;
@@ -421,6 +423,36 @@ static void evtchn_unbind_from_user(struct per_user_data *u,
 	del_evtchn(u, evtchn);
 }
 
+static void evtchn_bind_interdom_next_vcpu(int evtchn)
+{
+	unsigned int selected_cpu, irq;
+	struct irq_desc *desc = NULL;
+	unsigned long flags;
+
+	irq = irq_from_evtchn(evtchn);
+	desc = irq_to_desc(irq);
+
+	if (!desc)
+		return;
+
+	raw_spin_lock_irqsave(&desc->lock, flags);
+	selected_cpu = this_cpu_read(bind_last_selected_cpu);
+	selected_cpu = cpumask_next_and(selected_cpu,
+			desc->irq_common_data.affinity, cpu_online_mask);
+
+	if (unlikely(selected_cpu >= nr_cpu_ids))
+		selected_cpu = cpumask_first_and(desc->irq_common_data.affinity,
+				cpu_online_mask);
+
+	raw_spin_unlock_irqrestore(&desc->lock, flags);
+	this_cpu_write(bind_last_selected_cpu, selected_cpu);
+
+	local_irq_disable();
+	/* unmask expects irqs to be disabled */
+	xen_rebind_evtchn_to_cpu(evtchn, selected_cpu);
+	local_irq_enable();
+}
+
 static long evtchn_ioctl(struct file *file,
 			 unsigned int cmd, unsigned long arg)
 {
@@ -478,8 +510,10 @@ static long evtchn_ioctl(struct file *file,
 			break;
 
 		rc = evtchn_bind_to_user(u, bind_interdomain.local_port);
-		if (rc == 0)
+		if (rc == 0) {
 			rc = bind_interdomain.local_port;
+			evtchn_bind_interdom_next_vcpu(rc);
+		}
 		break;
 	}
 
diff --git a/include/xen/events.h b/include/xen/events.h
index 88da2ab..f442ca5 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -58,6 +58,7 @@ int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
 
 void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector);
 void rebind_evtchn_irq(int evtchn, int irq);
+int xen_rebind_evtchn_to_cpu(int evtchn, unsigned tcpu);
 
 static inline void notify_remote_via_evtchn(int port)
 {
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH] xen-evtchn: Bind dyn evtchn:qemu-dm interrupt to next online VCPU
  2017-06-02 14:49 [PATCH] xen-evtchn: Bind dyn evtchn:qemu-dm interrupt to next online VCPU Anoob Soman
  2017-06-02 15:10 ` Juergen Gross
@ 2017-06-02 15:10 ` Juergen Gross
  2017-06-05  9:54   ` Anoob Soman
  2017-06-05  9:54   ` Anoob Soman
  2017-06-02 16:24 ` Boris Ostrovsky
  2017-06-02 16:24 ` Boris Ostrovsky
  3 siblings, 2 replies; 22+ messages in thread
From: Juergen Gross @ 2017-06-02 15:10 UTC (permalink / raw)
  To: Anoob Soman, xen-devel, linux-kernel; +Cc: boris.ostrovsky

On 02/06/17 16:49, Anoob Soman wrote:
> A HVM domian booting generates around 200K (evtchn:qemu-dm xen-dyn)
> interrupts,in a short period of time. All these evtchn:qemu-dm are bound
> to VCPU 0, until irqbalance sees these IRQ and moves it to a different VCPU.
> In one configuration, irqbalance runs every 10 seconds, which means
> irqbalance doesn't get to see these burst of interrupts and doesn't
> re-balance interrupts most of the time, making all evtchn:qemu-dm to be
> processed by VCPU0. This cause VCPU0 to spend most of time processing
> hardirq and very little time on softirq. Moreover, if dom0 kernel PREEMPTION
> is disabled, VCPU0 never runs watchdog (process context), triggering a
> softlockup detection code to panic.
> 
> Binding evtchn:qemu-dm to next online VCPU, will spread hardirq
> processing evenly across different CPU. Later, irqbalance will try to balance
> evtchn:qemu-dm, if required.
> 
> Signed-off-by: Anoob Soman <anoob.soman@citrix.com>
> ---
>  drivers/xen/events/events_base.c |  9 +++++++--
>  drivers/xen/evtchn.c             | 36 +++++++++++++++++++++++++++++++++++-
>  include/xen/events.h             |  1 +
>  3 files changed, 43 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
> index b52852f..8224ec1 100644
> --- a/drivers/xen/events/events_base.c
> +++ b/drivers/xen/events/events_base.c
> @@ -1303,10 +1303,9 @@ void rebind_evtchn_irq(int evtchn, int irq)
>  }
>  
>  /* Rebind an evtchn so that it gets delivered to a specific cpu */
> -static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
> +int xen_rebind_evtchn_to_cpu(int evtchn, unsigned tcpu)
>  {
>  	struct evtchn_bind_vcpu bind_vcpu;
> -	int evtchn = evtchn_from_irq(irq);
>  	int masked;
>  
>  	if (!VALID_EVTCHN(evtchn))
> @@ -1338,6 +1337,12 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
>  
>  	return 0;
>  }
> +EXPORT_SYMBOL_GPL(xen_rebind_evtchn_to_cpu);
> +
> +static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
> +{
> +	return xen_rebind_evtchn_to_cpu(evtchn_from_irq(irq), tcpu);
> +}
>  
>  static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest,
>  			    bool force)
> diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
> index 10f1ef5..1192f24 100644
> --- a/drivers/xen/evtchn.c
> +++ b/drivers/xen/evtchn.c
> @@ -58,6 +58,8 @@
>  #include <xen/xen-ops.h>
>  #include <asm/xen/hypervisor.h>
>  
> +static DEFINE_PER_CPU(int, bind_last_selected_cpu);
> +
>  struct per_user_data {
>  	struct mutex bind_mutex; /* serialize bind/unbind operations */
>  	struct rb_root evtchns;
> @@ -421,6 +423,36 @@ static void evtchn_unbind_from_user(struct per_user_data *u,
>  	del_evtchn(u, evtchn);
>  }
>  
> +static void evtchn_bind_interdom_next_vcpu(int evtchn)
> +{
> +	unsigned int selected_cpu, irq;
> +	struct irq_desc *desc = NULL;
> +	unsigned long flags;
> +
> +	irq = irq_from_evtchn(evtchn);
> +	desc = irq_to_desc(irq);
> +
> +	if (!desc)
> +		return;
> +
> +	raw_spin_lock_irqsave(&desc->lock, flags);
> +	selected_cpu = this_cpu_read(bind_last_selected_cpu);
> +	selected_cpu = cpumask_next_and(selected_cpu,
> +			desc->irq_common_data.affinity, cpu_online_mask);
> +
> +	if (unlikely(selected_cpu >= nr_cpu_ids))
> +		selected_cpu = cpumask_first_and(desc->irq_common_data.affinity,
> +				cpu_online_mask);
> +
> +	raw_spin_unlock_irqrestore(&desc->lock, flags);
> +	this_cpu_write(bind_last_selected_cpu, selected_cpu);
> +
> +	local_irq_disable();
> +	/* unmask expects irqs to be disabled */
> +	xen_rebind_evtchn_to_cpu(evtchn, selected_cpu);
> +	local_irq_enable();

I'd prefer the to have irq disabled from taking the lock until here.
This will avoid problems due to preemption and will be faster as it
avoids one irq on/off cycle. So:

local_irq_disable();
raw_spin_lock();
...
raw_spin_unlock();
this_cpu_write();
xen_rebind_evtchn_to_cpu();
local_irq_enable();


Juergen

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] xen-evtchn: Bind dyn evtchn:qemu-dm interrupt to next online VCPU
  2017-06-02 14:49 [PATCH] xen-evtchn: Bind dyn evtchn:qemu-dm interrupt to next online VCPU Anoob Soman
@ 2017-06-02 15:10 ` Juergen Gross
  2017-06-02 15:10 ` Juergen Gross
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 22+ messages in thread
From: Juergen Gross @ 2017-06-02 15:10 UTC (permalink / raw)
  To: Anoob Soman, xen-devel, linux-kernel; +Cc: boris.ostrovsky

On 02/06/17 16:49, Anoob Soman wrote:
> A HVM domian booting generates around 200K (evtchn:qemu-dm xen-dyn)
> interrupts,in a short period of time. All these evtchn:qemu-dm are bound
> to VCPU 0, until irqbalance sees these IRQ and moves it to a different VCPU.
> In one configuration, irqbalance runs every 10 seconds, which means
> irqbalance doesn't get to see these burst of interrupts and doesn't
> re-balance interrupts most of the time, making all evtchn:qemu-dm to be
> processed by VCPU0. This cause VCPU0 to spend most of time processing
> hardirq and very little time on softirq. Moreover, if dom0 kernel PREEMPTION
> is disabled, VCPU0 never runs watchdog (process context), triggering a
> softlockup detection code to panic.
> 
> Binding evtchn:qemu-dm to next online VCPU, will spread hardirq
> processing evenly across different CPU. Later, irqbalance will try to balance
> evtchn:qemu-dm, if required.
> 
> Signed-off-by: Anoob Soman <anoob.soman@citrix.com>
> ---
>  drivers/xen/events/events_base.c |  9 +++++++--
>  drivers/xen/evtchn.c             | 36 +++++++++++++++++++++++++++++++++++-
>  include/xen/events.h             |  1 +
>  3 files changed, 43 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
> index b52852f..8224ec1 100644
> --- a/drivers/xen/events/events_base.c
> +++ b/drivers/xen/events/events_base.c
> @@ -1303,10 +1303,9 @@ void rebind_evtchn_irq(int evtchn, int irq)
>  }
>  
>  /* Rebind an evtchn so that it gets delivered to a specific cpu */
> -static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
> +int xen_rebind_evtchn_to_cpu(int evtchn, unsigned tcpu)
>  {
>  	struct evtchn_bind_vcpu bind_vcpu;
> -	int evtchn = evtchn_from_irq(irq);
>  	int masked;
>  
>  	if (!VALID_EVTCHN(evtchn))
> @@ -1338,6 +1337,12 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
>  
>  	return 0;
>  }
> +EXPORT_SYMBOL_GPL(xen_rebind_evtchn_to_cpu);
> +
> +static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
> +{
> +	return xen_rebind_evtchn_to_cpu(evtchn_from_irq(irq), tcpu);
> +}
>  
>  static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest,
>  			    bool force)
> diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
> index 10f1ef5..1192f24 100644
> --- a/drivers/xen/evtchn.c
> +++ b/drivers/xen/evtchn.c
> @@ -58,6 +58,8 @@
>  #include <xen/xen-ops.h>
>  #include <asm/xen/hypervisor.h>
>  
> +static DEFINE_PER_CPU(int, bind_last_selected_cpu);
> +
>  struct per_user_data {
>  	struct mutex bind_mutex; /* serialize bind/unbind operations */
>  	struct rb_root evtchns;
> @@ -421,6 +423,36 @@ static void evtchn_unbind_from_user(struct per_user_data *u,
>  	del_evtchn(u, evtchn);
>  }
>  
> +static void evtchn_bind_interdom_next_vcpu(int evtchn)
> +{
> +	unsigned int selected_cpu, irq;
> +	struct irq_desc *desc = NULL;
> +	unsigned long flags;
> +
> +	irq = irq_from_evtchn(evtchn);
> +	desc = irq_to_desc(irq);
> +
> +	if (!desc)
> +		return;
> +
> +	raw_spin_lock_irqsave(&desc->lock, flags);
> +	selected_cpu = this_cpu_read(bind_last_selected_cpu);
> +	selected_cpu = cpumask_next_and(selected_cpu,
> +			desc->irq_common_data.affinity, cpu_online_mask);
> +
> +	if (unlikely(selected_cpu >= nr_cpu_ids))
> +		selected_cpu = cpumask_first_and(desc->irq_common_data.affinity,
> +				cpu_online_mask);
> +
> +	raw_spin_unlock_irqrestore(&desc->lock, flags);
> +	this_cpu_write(bind_last_selected_cpu, selected_cpu);
> +
> +	local_irq_disable();
> +	/* unmask expects irqs to be disabled */
> +	xen_rebind_evtchn_to_cpu(evtchn, selected_cpu);
> +	local_irq_enable();

I'd prefer the to have irq disabled from taking the lock until here.
This will avoid problems due to preemption and will be faster as it
avoids one irq on/off cycle. So:

local_irq_disable();
raw_spin_lock();
...
raw_spin_unlock();
this_cpu_write();
xen_rebind_evtchn_to_cpu();
local_irq_enable();


Juergen

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] xen-evtchn: Bind dyn evtchn:qemu-dm interrupt to next online VCPU
  2017-06-02 14:49 [PATCH] xen-evtchn: Bind dyn evtchn:qemu-dm interrupt to next online VCPU Anoob Soman
                   ` (2 preceding siblings ...)
  2017-06-02 16:24 ` Boris Ostrovsky
@ 2017-06-02 16:24 ` Boris Ostrovsky
  2017-06-05 10:14   ` Anoob Soman
  2017-06-05 10:14   ` Anoob Soman
  3 siblings, 2 replies; 22+ messages in thread
From: Boris Ostrovsky @ 2017-06-02 16:24 UTC (permalink / raw)
  To: Anoob Soman, xen-devel, linux-kernel; +Cc: jgross


>  
>  static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest,
>  			    bool force)
> diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
> index 10f1ef5..1192f24 100644
> --- a/drivers/xen/evtchn.c
> +++ b/drivers/xen/evtchn.c
> @@ -58,6 +58,8 @@
>  #include <xen/xen-ops.h>
>  #include <asm/xen/hypervisor.h>
>  
> +static DEFINE_PER_CPU(int, bind_last_selected_cpu);

This should be moved into evtchn_bind_interdom_next_vcpu() since that's
the only place referencing it.

Why is it a percpu variable BTW? Wouldn't making it global result in
better interrupt distribution?

> +
>  struct per_user_data {
>  	struct mutex bind_mutex; /* serialize bind/unbind operations */
>  	struct rb_root evtchns;
> @@ -421,6 +423,36 @@ static void evtchn_unbind_from_user(struct per_user_data *u,
>  	del_evtchn(u, evtchn);
>  }
>  
> +static void evtchn_bind_interdom_next_vcpu(int evtchn)
> +{
> +	unsigned int selected_cpu, irq;
> +	struct irq_desc *desc = NULL;
> +	unsigned long flags;
> +
> +	irq = irq_from_evtchn(evtchn);
> +	desc = irq_to_desc(irq);
> +
> +	if (!desc)
> +		return;
> +
> +	raw_spin_lock_irqsave(&desc->lock, flags);
> +	selected_cpu = this_cpu_read(bind_last_selected_cpu);
> +	selected_cpu = cpumask_next_and(selected_cpu,
> +			desc->irq_common_data.affinity, cpu_online_mask);
> +
> +	if (unlikely(selected_cpu >= nr_cpu_ids))
> +		selected_cpu = cpumask_first_and(desc->irq_common_data.affinity,
> +				cpu_online_mask);
> +
> +	raw_spin_unlock_irqrestore(&desc->lock, flags);

I think if you follow Juergen's suggestion of wrapping everything into
irq_enable/disable you can drop the lock altogether (assuming you keep
bind_last_selected_cpu percpu).

-boris


> +	this_cpu_write(bind_last_selected_cpu, selected_cpu);
> +
> +	local_irq_disable();
> +	/* unmask expects irqs to be disabled */
> +	xen_rebind_evtchn_to_cpu(evtchn, selected_cpu);
> +	local_irq_enable();
> +}
> +
>

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] xen-evtchn: Bind dyn evtchn:qemu-dm interrupt to next online VCPU
  2017-06-02 14:49 [PATCH] xen-evtchn: Bind dyn evtchn:qemu-dm interrupt to next online VCPU Anoob Soman
  2017-06-02 15:10 ` Juergen Gross
  2017-06-02 15:10 ` Juergen Gross
@ 2017-06-02 16:24 ` Boris Ostrovsky
  2017-06-02 16:24 ` Boris Ostrovsky
  3 siblings, 0 replies; 22+ messages in thread
From: Boris Ostrovsky @ 2017-06-02 16:24 UTC (permalink / raw)
  To: Anoob Soman, xen-devel, linux-kernel; +Cc: jgross


>  
>  static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest,
>  			    bool force)
> diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
> index 10f1ef5..1192f24 100644
> --- a/drivers/xen/evtchn.c
> +++ b/drivers/xen/evtchn.c
> @@ -58,6 +58,8 @@
>  #include <xen/xen-ops.h>
>  #include <asm/xen/hypervisor.h>
>  
> +static DEFINE_PER_CPU(int, bind_last_selected_cpu);

This should be moved into evtchn_bind_interdom_next_vcpu() since that's
the only place referencing it.

Why is it a percpu variable BTW? Wouldn't making it global result in
better interrupt distribution?

> +
>  struct per_user_data {
>  	struct mutex bind_mutex; /* serialize bind/unbind operations */
>  	struct rb_root evtchns;
> @@ -421,6 +423,36 @@ static void evtchn_unbind_from_user(struct per_user_data *u,
>  	del_evtchn(u, evtchn);
>  }
>  
> +static void evtchn_bind_interdom_next_vcpu(int evtchn)
> +{
> +	unsigned int selected_cpu, irq;
> +	struct irq_desc *desc = NULL;
> +	unsigned long flags;
> +
> +	irq = irq_from_evtchn(evtchn);
> +	desc = irq_to_desc(irq);
> +
> +	if (!desc)
> +		return;
> +
> +	raw_spin_lock_irqsave(&desc->lock, flags);
> +	selected_cpu = this_cpu_read(bind_last_selected_cpu);
> +	selected_cpu = cpumask_next_and(selected_cpu,
> +			desc->irq_common_data.affinity, cpu_online_mask);
> +
> +	if (unlikely(selected_cpu >= nr_cpu_ids))
> +		selected_cpu = cpumask_first_and(desc->irq_common_data.affinity,
> +				cpu_online_mask);
> +
> +	raw_spin_unlock_irqrestore(&desc->lock, flags);

I think if you follow Juergen's suggestion of wrapping everything into
irq_enable/disable you can drop the lock altogether (assuming you keep
bind_last_selected_cpu percpu).

-boris


> +	this_cpu_write(bind_last_selected_cpu, selected_cpu);
> +
> +	local_irq_disable();
> +	/* unmask expects irqs to be disabled */
> +	xen_rebind_evtchn_to_cpu(evtchn, selected_cpu);
> +	local_irq_enable();
> +}
> +
>


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] xen-evtchn: Bind dyn evtchn:qemu-dm interrupt to next online VCPU
  2017-06-02 15:10 ` Juergen Gross
  2017-06-05  9:54   ` Anoob Soman
@ 2017-06-05  9:54   ` Anoob Soman
  1 sibling, 0 replies; 22+ messages in thread
From: Anoob Soman @ 2017-06-05  9:54 UTC (permalink / raw)
  To: Juergen Gross, xen-devel, linux-kernel; +Cc: boris.ostrovsky

On 02/06/17 16:10, Juergen Gross wrote:
>
> I'd prefer the to have irq disabled from taking the lock until here.
> This will avoid problems due to preemption and will be faster as it
> avoids one irq on/off cycle. So:
>
> local_irq_disable();
> raw_spin_lock();
> ...
> raw_spin_unlock();
> this_cpu_write();
> xen_rebind_evtchn_to_cpu();
> local_irq_enable();
>
>
> Juergen


Agreed, I will send a V2 with your suggestion.


-Anoob.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] xen-evtchn: Bind dyn evtchn:qemu-dm interrupt to next online VCPU
  2017-06-02 15:10 ` Juergen Gross
@ 2017-06-05  9:54   ` Anoob Soman
  2017-06-05  9:54   ` Anoob Soman
  1 sibling, 0 replies; 22+ messages in thread
From: Anoob Soman @ 2017-06-05  9:54 UTC (permalink / raw)
  To: Juergen Gross, xen-devel, linux-kernel; +Cc: boris.ostrovsky

On 02/06/17 16:10, Juergen Gross wrote:
>
> I'd prefer the to have irq disabled from taking the lock until here.
> This will avoid problems due to preemption and will be faster as it
> avoids one irq on/off cycle. So:
>
> local_irq_disable();
> raw_spin_lock();
> ...
> raw_spin_unlock();
> this_cpu_write();
> xen_rebind_evtchn_to_cpu();
> local_irq_enable();
>
>
> Juergen


Agreed, I will send a V2 with your suggestion.


-Anoob.


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] xen-evtchn: Bind dyn evtchn:qemu-dm interrupt to next online VCPU
  2017-06-02 16:24 ` Boris Ostrovsky
@ 2017-06-05 10:14   ` Anoob Soman
  2017-06-05 14:10     ` Boris Ostrovsky
  2017-06-05 14:10     ` Boris Ostrovsky
  2017-06-05 10:14   ` Anoob Soman
  1 sibling, 2 replies; 22+ messages in thread
From: Anoob Soman @ 2017-06-05 10:14 UTC (permalink / raw)
  To: Boris Ostrovsky, xen-devel, linux-kernel; +Cc: jgross

On 02/06/17 17:24, Boris Ostrovsky wrote:
>>   
>>   static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest,
>>   			    bool force)
>> diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
>> index 10f1ef5..1192f24 100644
>> --- a/drivers/xen/evtchn.c
>> +++ b/drivers/xen/evtchn.c
>> @@ -58,6 +58,8 @@
>>   #include <xen/xen-ops.h>
>>   #include <asm/xen/hypervisor.h>
>>   
>> +static DEFINE_PER_CPU(int, bind_last_selected_cpu);
> This should be moved into evtchn_bind_interdom_next_vcpu() since that's
> the only place referencing it.

Sure, I will do it.

>
> Why is it a percpu variable BTW? Wouldn't making it global result in
> better interrupt distribution?

The reason for percpu instead of global, was to avoid locking. We can 
have a global variable (last_cpu) without locking, but value of last_cpu 
wont be consistent, without locks. Moreover, since irq_affinity is also 
used in the calculation of cpu to bind, having a percpu or global 
wouldn't really matter, as the result (selected_cpu) is more likely to 
be random (because different irqs can have different affinity). What do 
you guys suggest.

>
>> +
>>   struct per_user_data {
>>   	struct mutex bind_mutex; /* serialize bind/unbind operations */
>>   	struct rb_root evtchns;
>> @@ -421,6 +423,36 @@ static void evtchn_unbind_from_user(struct per_user_data *u,
>>   	del_evtchn(u, evtchn);
>>   }
>>   
>> +static void evtchn_bind_interdom_next_vcpu(int evtchn)
>> +{
>> +	unsigned int selected_cpu, irq;
>> +	struct irq_desc *desc = NULL;
>> +	unsigned long flags;
>> +
>> +	irq = irq_from_evtchn(evtchn);
>> +	desc = irq_to_desc(irq);
>> +
>> +	if (!desc)
>> +		return;
>> +
>> +	raw_spin_lock_irqsave(&desc->lock, flags);
>> +	selected_cpu = this_cpu_read(bind_last_selected_cpu);
>> +	selected_cpu = cpumask_next_and(selected_cpu,
>> +			desc->irq_common_data.affinity, cpu_online_mask);
>> +
>> +	if (unlikely(selected_cpu >= nr_cpu_ids))
>> +		selected_cpu = cpumask_first_and(desc->irq_common_data.affinity,
>> +				cpu_online_mask);
>> +
>> +	raw_spin_unlock_irqrestore(&desc->lock, flags);
> I think if you follow Juergen's suggestion of wrapping everything into
> irq_enable/disable you can drop the lock altogether (assuming you keep
> bind_last_selected_cpu percpu).
>
> -boris
>

I think we would still require spin_lock(). spin_lock is for irq_desc.

>> +	this_cpu_write(bind_last_selected_cpu, selected_cpu);
>> +
>> +	local_irq_disable();
>> +	/* unmask expects irqs to be disabled */
>> +	xen_rebind_evtchn_to_cpu(evtchn, selected_cpu);
>> +	local_irq_enable();
>> +}
>> +
>>

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] xen-evtchn: Bind dyn evtchn:qemu-dm interrupt to next online VCPU
  2017-06-02 16:24 ` Boris Ostrovsky
  2017-06-05 10:14   ` Anoob Soman
@ 2017-06-05 10:14   ` Anoob Soman
  1 sibling, 0 replies; 22+ messages in thread
From: Anoob Soman @ 2017-06-05 10:14 UTC (permalink / raw)
  To: Boris Ostrovsky, xen-devel, linux-kernel; +Cc: jgross

On 02/06/17 17:24, Boris Ostrovsky wrote:
>>   
>>   static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest,
>>   			    bool force)
>> diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
>> index 10f1ef5..1192f24 100644
>> --- a/drivers/xen/evtchn.c
>> +++ b/drivers/xen/evtchn.c
>> @@ -58,6 +58,8 @@
>>   #include <xen/xen-ops.h>
>>   #include <asm/xen/hypervisor.h>
>>   
>> +static DEFINE_PER_CPU(int, bind_last_selected_cpu);
> This should be moved into evtchn_bind_interdom_next_vcpu() since that's
> the only place referencing it.

Sure, I will do it.

>
> Why is it a percpu variable BTW? Wouldn't making it global result in
> better interrupt distribution?

The reason for percpu instead of global, was to avoid locking. We can 
have a global variable (last_cpu) without locking, but value of last_cpu 
wont be consistent, without locks. Moreover, since irq_affinity is also 
used in the calculation of cpu to bind, having a percpu or global 
wouldn't really matter, as the result (selected_cpu) is more likely to 
be random (because different irqs can have different affinity). What do 
you guys suggest.

>
>> +
>>   struct per_user_data {
>>   	struct mutex bind_mutex; /* serialize bind/unbind operations */
>>   	struct rb_root evtchns;
>> @@ -421,6 +423,36 @@ static void evtchn_unbind_from_user(struct per_user_data *u,
>>   	del_evtchn(u, evtchn);
>>   }
>>   
>> +static void evtchn_bind_interdom_next_vcpu(int evtchn)
>> +{
>> +	unsigned int selected_cpu, irq;
>> +	struct irq_desc *desc = NULL;
>> +	unsigned long flags;
>> +
>> +	irq = irq_from_evtchn(evtchn);
>> +	desc = irq_to_desc(irq);
>> +
>> +	if (!desc)
>> +		return;
>> +
>> +	raw_spin_lock_irqsave(&desc->lock, flags);
>> +	selected_cpu = this_cpu_read(bind_last_selected_cpu);
>> +	selected_cpu = cpumask_next_and(selected_cpu,
>> +			desc->irq_common_data.affinity, cpu_online_mask);
>> +
>> +	if (unlikely(selected_cpu >= nr_cpu_ids))
>> +		selected_cpu = cpumask_first_and(desc->irq_common_data.affinity,
>> +				cpu_online_mask);
>> +
>> +	raw_spin_unlock_irqrestore(&desc->lock, flags);
> I think if you follow Juergen's suggestion of wrapping everything into
> irq_enable/disable you can drop the lock altogether (assuming you keep
> bind_last_selected_cpu percpu).
>
> -boris
>

I think we would still require spin_lock(). spin_lock is for irq_desc.

>> +	this_cpu_write(bind_last_selected_cpu, selected_cpu);
>> +
>> +	local_irq_disable();
>> +	/* unmask expects irqs to be disabled */
>> +	xen_rebind_evtchn_to_cpu(evtchn, selected_cpu);
>> +	local_irq_enable();
>> +}
>> +
>>


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] xen-evtchn: Bind dyn evtchn:qemu-dm interrupt to next online VCPU
  2017-06-05 10:14   ` Anoob Soman
@ 2017-06-05 14:10     ` Boris Ostrovsky
  2017-06-05 14:49       ` [Xen-devel] " Anoob Soman
  2017-06-05 14:49       ` Anoob Soman
  2017-06-05 14:10     ` Boris Ostrovsky
  1 sibling, 2 replies; 22+ messages in thread
From: Boris Ostrovsky @ 2017-06-05 14:10 UTC (permalink / raw)
  To: Anoob Soman, xen-devel, linux-kernel; +Cc: jgross

On 06/05/2017 06:14 AM, Anoob Soman wrote:
> On 02/06/17 17:24, Boris Ostrovsky wrote:
>>>     static int set_affinity_irq(struct irq_data *data, const struct
>>> cpumask *dest,
>>>                   bool force)
>>> diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
>>> index 10f1ef5..1192f24 100644
>>> --- a/drivers/xen/evtchn.c
>>> +++ b/drivers/xen/evtchn.c
>>> @@ -58,6 +58,8 @@
>>>   #include <xen/xen-ops.h>
>>>   #include <asm/xen/hypervisor.h>
>>>   +static DEFINE_PER_CPU(int, bind_last_selected_cpu);
>> This should be moved into evtchn_bind_interdom_next_vcpu() since that's
>> the only place referencing it.
>
> Sure, I will do it.
>
>>
>> Why is it a percpu variable BTW? Wouldn't making it global result in
>> better interrupt distribution?
>
> The reason for percpu instead of global, was to avoid locking. We can
> have a global variable (last_cpu) without locking, but value of
> last_cpu wont be consistent, without locks. Moreover, since
> irq_affinity is also used in the calculation of cpu to bind, having a
> percpu or global wouldn't really matter, as the result (selected_cpu)
> is more likely to be random (because different irqs can have different
> affinity). What do you guys suggest.

Doesn't initial affinity (which is what we expect here since irqbalance
has not run yet) typically cover all guest VCPUs?

>
>>
>>> +
>>>   struct per_user_data {
>>>       struct mutex bind_mutex; /* serialize bind/unbind operations */
>>>       struct rb_root evtchns;
>>> @@ -421,6 +423,36 @@ static void evtchn_unbind_from_user(struct
>>> per_user_data *u,
>>>       del_evtchn(u, evtchn);
>>>   }
>>>   +static void evtchn_bind_interdom_next_vcpu(int evtchn)
>>> +{
>>> +    unsigned int selected_cpu, irq;
>>> +    struct irq_desc *desc = NULL;
>>> +    unsigned long flags;
>>> +
>>> +    irq = irq_from_evtchn(evtchn);
>>> +    desc = irq_to_desc(irq);
>>> +
>>> +    if (!desc)
>>> +        return;
>>> +
>>> +    raw_spin_lock_irqsave(&desc->lock, flags);
>>> +    selected_cpu = this_cpu_read(bind_last_selected_cpu);
>>> +    selected_cpu = cpumask_next_and(selected_cpu,
>>> +            desc->irq_common_data.affinity, cpu_online_mask);
>>> +
>>> +    if (unlikely(selected_cpu >= nr_cpu_ids))
>>> +        selected_cpu =
>>> cpumask_first_and(desc->irq_common_data.affinity,
>>> +                cpu_online_mask);
>>> +
>>> +    raw_spin_unlock_irqrestore(&desc->lock, flags);
>> I think if you follow Juergen's suggestion of wrapping everything into
>> irq_enable/disable you can drop the lock altogether (assuming you keep
>> bind_last_selected_cpu percpu).
>>
>> -boris
>>
>
> I think we would still require spin_lock(). spin_lock is for irq_desc.

If you are trying to protect affinity then it may well change after you
drop the lock.

In fact, don't you have a race here? If we offline a VCPU we will (by
way of cpu_disable_common()->fixup_irqs()) update affinity to reflect
that a CPU is gone and there is a chance that xen_rebind_evtchn_to_cpu()
will happen after that.

So, contrary to what I said earlier ;-) not only do you need the lock,
but you should hold it across xen_rebind_evtchn_to_cpu() call. Does this
make sense?

-boris


>
>>> +    this_cpu_write(bind_last_selected_cpu, selected_cpu);
>>> +
>>> +    local_irq_disable();
>>> +    /* unmask expects irqs to be disabled */
>>> +    xen_rebind_evtchn_to_cpu(evtchn, selected_cpu);
>>> +    local_irq_enable();
>>> +}
>>> +
>>>
>

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] xen-evtchn: Bind dyn evtchn:qemu-dm interrupt to next online VCPU
  2017-06-05 10:14   ` Anoob Soman
  2017-06-05 14:10     ` Boris Ostrovsky
@ 2017-06-05 14:10     ` Boris Ostrovsky
  1 sibling, 0 replies; 22+ messages in thread
From: Boris Ostrovsky @ 2017-06-05 14:10 UTC (permalink / raw)
  To: Anoob Soman, xen-devel, linux-kernel; +Cc: jgross

On 06/05/2017 06:14 AM, Anoob Soman wrote:
> On 02/06/17 17:24, Boris Ostrovsky wrote:
>>>     static int set_affinity_irq(struct irq_data *data, const struct
>>> cpumask *dest,
>>>                   bool force)
>>> diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
>>> index 10f1ef5..1192f24 100644
>>> --- a/drivers/xen/evtchn.c
>>> +++ b/drivers/xen/evtchn.c
>>> @@ -58,6 +58,8 @@
>>>   #include <xen/xen-ops.h>
>>>   #include <asm/xen/hypervisor.h>
>>>   +static DEFINE_PER_CPU(int, bind_last_selected_cpu);
>> This should be moved into evtchn_bind_interdom_next_vcpu() since that's
>> the only place referencing it.
>
> Sure, I will do it.
>
>>
>> Why is it a percpu variable BTW? Wouldn't making it global result in
>> better interrupt distribution?
>
> The reason for percpu instead of global, was to avoid locking. We can
> have a global variable (last_cpu) without locking, but value of
> last_cpu wont be consistent, without locks. Moreover, since
> irq_affinity is also used in the calculation of cpu to bind, having a
> percpu or global wouldn't really matter, as the result (selected_cpu)
> is more likely to be random (because different irqs can have different
> affinity). What do you guys suggest.

Doesn't initial affinity (which is what we expect here since irqbalance
has not run yet) typically cover all guest VCPUs?

>
>>
>>> +
>>>   struct per_user_data {
>>>       struct mutex bind_mutex; /* serialize bind/unbind operations */
>>>       struct rb_root evtchns;
>>> @@ -421,6 +423,36 @@ static void evtchn_unbind_from_user(struct
>>> per_user_data *u,
>>>       del_evtchn(u, evtchn);
>>>   }
>>>   +static void evtchn_bind_interdom_next_vcpu(int evtchn)
>>> +{
>>> +    unsigned int selected_cpu, irq;
>>> +    struct irq_desc *desc = NULL;
>>> +    unsigned long flags;
>>> +
>>> +    irq = irq_from_evtchn(evtchn);
>>> +    desc = irq_to_desc(irq);
>>> +
>>> +    if (!desc)
>>> +        return;
>>> +
>>> +    raw_spin_lock_irqsave(&desc->lock, flags);
>>> +    selected_cpu = this_cpu_read(bind_last_selected_cpu);
>>> +    selected_cpu = cpumask_next_and(selected_cpu,
>>> +            desc->irq_common_data.affinity, cpu_online_mask);
>>> +
>>> +    if (unlikely(selected_cpu >= nr_cpu_ids))
>>> +        selected_cpu =
>>> cpumask_first_and(desc->irq_common_data.affinity,
>>> +                cpu_online_mask);
>>> +
>>> +    raw_spin_unlock_irqrestore(&desc->lock, flags);
>> I think if you follow Juergen's suggestion of wrapping everything into
>> irq_enable/disable you can drop the lock altogether (assuming you keep
>> bind_last_selected_cpu percpu).
>>
>> -boris
>>
>
> I think we would still require spin_lock(). spin_lock is for irq_desc.

If you are trying to protect affinity then it may well change after you
drop the lock.

In fact, don't you have a race here? If we offline a VCPU we will (by
way of cpu_disable_common()->fixup_irqs()) update affinity to reflect
that a CPU is gone and there is a chance that xen_rebind_evtchn_to_cpu()
will happen after that.

So, contrary to what I said earlier ;-) not only do you need the lock,
but you should hold it across xen_rebind_evtchn_to_cpu() call. Does this
make sense?

-boris


>
>>> +    this_cpu_write(bind_last_selected_cpu, selected_cpu);
>>> +
>>> +    local_irq_disable();
>>> +    /* unmask expects irqs to be disabled */
>>> +    xen_rebind_evtchn_to_cpu(evtchn, selected_cpu);
>>> +    local_irq_enable();
>>> +}
>>> +
>>>
>


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [Xen-devel] [PATCH] xen-evtchn: Bind dyn evtchn:qemu-dm interrupt to next online VCPU
  2017-06-05 14:10     ` Boris Ostrovsky
@ 2017-06-05 14:49       ` Anoob Soman
  2017-06-05 15:32         ` Boris Ostrovsky
  2017-06-05 15:32         ` Boris Ostrovsky
  2017-06-05 14:49       ` Anoob Soman
  1 sibling, 2 replies; 22+ messages in thread
From: Anoob Soman @ 2017-06-05 14:49 UTC (permalink / raw)
  To: Boris Ostrovsky, xen-devel, linux-kernel; +Cc: jgross

On 05/06/17 15:10, Boris Ostrovsky wrote:
>> The reason for percpu instead of global, was to avoid locking. We can
>> have a global variable (last_cpu) without locking, but value of
>> last_cpu wont be consistent, without locks. Moreover, since
>> irq_affinity is also used in the calculation of cpu to bind, having a
>> percpu or global wouldn't really matter, as the result (selected_cpu)
>> is more likely to be random (because different irqs can have different
>> affinity). What do you guys suggest.
> Doesn't initial affinity (which is what we expect here since irqbalance
> has not run yet) typically cover all guest VCPUs?

Yes, initial affinity covers all online VCPUs. But there is a small 
chance that initial affinity might change, before 
evtch_bind_interdom_next_vcpu is called. For example, I could run a 
script to change irq affinity, just when irq sysfs entry appears. This 
is the reason that I thought it would be sensible (based on your 
suggestion) to include irq_affinity to calculate the next VCPU. If you 
think, changing irq_affinity between request_irq() and 
evtch_bind_interdom_next_vcpu is virtually impossible, then we can drop 
affinity and just use cpu_online_mask.

>>
>> I think we would still require spin_lock(). spin_lock is for irq_desc.
> If you are trying to protect affinity then it may well change after you
> drop the lock.
>
> In fact, don't you have a race here? If we offline a VCPU we will (by
> way of cpu_disable_common()->fixup_irqs()) update affinity to reflect
> that a CPU is gone and there is a chance that xen_rebind_evtchn_to_cpu()
> will happen after that.
>
> So, contrary to what I said earlier ;-) not only do you need the lock,
> but you should hold it across xen_rebind_evtchn_to_cpu() call. Does this
> make sense?

Yes, you are correct. .irq_set_affinity pretty much does the same thing.

The code will now looks like this.
raw_spin_lock_irqsave(lock, flags);
percpu read
select_cpu
percpu write
xen_rebind_evtchn_to_cpu(evtchn, selected_cpu)
raw_spin_unlock_irqsave(lock, flags);

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] xen-evtchn: Bind dyn evtchn:qemu-dm interrupt to next online VCPU
  2017-06-05 14:10     ` Boris Ostrovsky
  2017-06-05 14:49       ` [Xen-devel] " Anoob Soman
@ 2017-06-05 14:49       ` Anoob Soman
  1 sibling, 0 replies; 22+ messages in thread
From: Anoob Soman @ 2017-06-05 14:49 UTC (permalink / raw)
  To: Boris Ostrovsky, xen-devel, linux-kernel; +Cc: jgross

On 05/06/17 15:10, Boris Ostrovsky wrote:
>> The reason for percpu instead of global, was to avoid locking. We can
>> have a global variable (last_cpu) without locking, but value of
>> last_cpu wont be consistent, without locks. Moreover, since
>> irq_affinity is also used in the calculation of cpu to bind, having a
>> percpu or global wouldn't really matter, as the result (selected_cpu)
>> is more likely to be random (because different irqs can have different
>> affinity). What do you guys suggest.
> Doesn't initial affinity (which is what we expect here since irqbalance
> has not run yet) typically cover all guest VCPUs?

Yes, initial affinity covers all online VCPUs. But there is a small 
chance that initial affinity might change, before 
evtch_bind_interdom_next_vcpu is called. For example, I could run a 
script to change irq affinity, just when irq sysfs entry appears. This 
is the reason that I thought it would be sensible (based on your 
suggestion) to include irq_affinity to calculate the next VCPU. If you 
think, changing irq_affinity between request_irq() and 
evtch_bind_interdom_next_vcpu is virtually impossible, then we can drop 
affinity and just use cpu_online_mask.

>>
>> I think we would still require spin_lock(). spin_lock is for irq_desc.
> If you are trying to protect affinity then it may well change after you
> drop the lock.
>
> In fact, don't you have a race here? If we offline a VCPU we will (by
> way of cpu_disable_common()->fixup_irqs()) update affinity to reflect
> that a CPU is gone and there is a chance that xen_rebind_evtchn_to_cpu()
> will happen after that.
>
> So, contrary to what I said earlier ;-) not only do you need the lock,
> but you should hold it across xen_rebind_evtchn_to_cpu() call. Does this
> make sense?

Yes, you are correct. .irq_set_affinity pretty much does the same thing.

The code will now looks like this.
raw_spin_lock_irqsave(lock, flags);
percpu read
select_cpu
percpu write
xen_rebind_evtchn_to_cpu(evtchn, selected_cpu)
raw_spin_unlock_irqsave(lock, flags);

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [Xen-devel] [PATCH] xen-evtchn: Bind dyn evtchn:qemu-dm interrupt to next online VCPU
  2017-06-05 14:49       ` [Xen-devel] " Anoob Soman
@ 2017-06-05 15:32         ` Boris Ostrovsky
  2017-06-05 16:14           ` Anoob Soman
  2017-06-05 16:14           ` Anoob Soman
  2017-06-05 15:32         ` Boris Ostrovsky
  1 sibling, 2 replies; 22+ messages in thread
From: Boris Ostrovsky @ 2017-06-05 15:32 UTC (permalink / raw)
  To: Anoob Soman, xen-devel, linux-kernel; +Cc: jgross

On 06/05/2017 10:49 AM, Anoob Soman wrote:
> On 05/06/17 15:10, Boris Ostrovsky wrote:
>>> The reason for percpu instead of global, was to avoid locking. We can
>>> have a global variable (last_cpu) without locking, but value of
>>> last_cpu wont be consistent, without locks. Moreover, since
>>> irq_affinity is also used in the calculation of cpu to bind, having a
>>> percpu or global wouldn't really matter, as the result (selected_cpu)
>>> is more likely to be random (because different irqs can have different
>>> affinity). What do you guys suggest.
>> Doesn't initial affinity (which is what we expect here since irqbalance
>> has not run yet) typically cover all guest VCPUs?
>
> Yes, initial affinity covers all online VCPUs. But there is a small
> chance that initial affinity might change, before
> evtch_bind_interdom_next_vcpu is called. For example, I could run a
> script to change irq affinity, just when irq sysfs entry appears. This
> is the reason that I thought it would be sensible (based on your
> suggestion) to include irq_affinity to calculate the next VCPU. If you
> think, changing irq_affinity between request_irq() and
> evtch_bind_interdom_next_vcpu is virtually impossible, then we can
> drop affinity and just use cpu_online_mask.

I believe we do need to take affinity into consideration even if the
chance that it is non-default is small.

I am not opposed to having bind_last_selected_cpu percpu, I just wanted
to understand the reason better. Additional locking would be a downside
with a global so if you feel that percpu is worth it then I won't object.

>
>>>
>>> I think we would still require spin_lock(). spin_lock is for irq_desc.
>> If you are trying to protect affinity then it may well change after you
>> drop the lock.
>>
>> In fact, don't you have a race here? If we offline a VCPU we will (by
>> way of cpu_disable_common()->fixup_irqs()) update affinity to reflect
>> that a CPU is gone and there is a chance that xen_rebind_evtchn_to_cpu()
>> will happen after that.
>>
>> So, contrary to what I said earlier ;-) not only do you need the lock,
>> but you should hold it across xen_rebind_evtchn_to_cpu() call. Does this
>> make sense?
>
> Yes, you are correct. .irq_set_affinity pretty much does the same thing.
>
> The code will now looks like this.
> raw_spin_lock_irqsave(lock, flags);
> percpu read
> select_cpu
> percpu write
> xen_rebind_evtchn_to_cpu(evtchn, selected_cpu)
> raw_spin_unlock_irqsave(lock, flags);

(BTW, I just noticed --- you don't need to initialize desc)

-boris

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] xen-evtchn: Bind dyn evtchn:qemu-dm interrupt to next online VCPU
  2017-06-05 14:49       ` [Xen-devel] " Anoob Soman
  2017-06-05 15:32         ` Boris Ostrovsky
@ 2017-06-05 15:32         ` Boris Ostrovsky
  1 sibling, 0 replies; 22+ messages in thread
From: Boris Ostrovsky @ 2017-06-05 15:32 UTC (permalink / raw)
  To: Anoob Soman, xen-devel, linux-kernel; +Cc: jgross

On 06/05/2017 10:49 AM, Anoob Soman wrote:
> On 05/06/17 15:10, Boris Ostrovsky wrote:
>>> The reason for percpu instead of global, was to avoid locking. We can
>>> have a global variable (last_cpu) without locking, but value of
>>> last_cpu wont be consistent, without locks. Moreover, since
>>> irq_affinity is also used in the calculation of cpu to bind, having a
>>> percpu or global wouldn't really matter, as the result (selected_cpu)
>>> is more likely to be random (because different irqs can have different
>>> affinity). What do you guys suggest.
>> Doesn't initial affinity (which is what we expect here since irqbalance
>> has not run yet) typically cover all guest VCPUs?
>
> Yes, initial affinity covers all online VCPUs. But there is a small
> chance that initial affinity might change, before
> evtch_bind_interdom_next_vcpu is called. For example, I could run a
> script to change irq affinity, just when irq sysfs entry appears. This
> is the reason that I thought it would be sensible (based on your
> suggestion) to include irq_affinity to calculate the next VCPU. If you
> think, changing irq_affinity between request_irq() and
> evtch_bind_interdom_next_vcpu is virtually impossible, then we can
> drop affinity and just use cpu_online_mask.

I believe we do need to take affinity into consideration even if the
chance that it is non-default is small.

I am not opposed to having bind_last_selected_cpu percpu, I just wanted
to understand the reason better. Additional locking would be a downside
with a global so if you feel that percpu is worth it then I won't object.

>
>>>
>>> I think we would still require spin_lock(). spin_lock is for irq_desc.
>> If you are trying to protect affinity then it may well change after you
>> drop the lock.
>>
>> In fact, don't you have a race here? If we offline a VCPU we will (by
>> way of cpu_disable_common()->fixup_irqs()) update affinity to reflect
>> that a CPU is gone and there is a chance that xen_rebind_evtchn_to_cpu()
>> will happen after that.
>>
>> So, contrary to what I said earlier ;-) not only do you need the lock,
>> but you should hold it across xen_rebind_evtchn_to_cpu() call. Does this
>> make sense?
>
> Yes, you are correct. .irq_set_affinity pretty much does the same thing.
>
> The code will now looks like this.
> raw_spin_lock_irqsave(lock, flags);
> percpu read
> select_cpu
> percpu write
> xen_rebind_evtchn_to_cpu(evtchn, selected_cpu)
> raw_spin_unlock_irqsave(lock, flags);

(BTW, I just noticed --- you don't need to initialize desc)

-boris

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [Xen-devel] [PATCH] xen-evtchn: Bind dyn evtchn:qemu-dm interrupt to next online VCPU
  2017-06-05 15:32         ` Boris Ostrovsky
@ 2017-06-05 16:14           ` Anoob Soman
  2017-06-05 16:46             ` Boris Ostrovsky
  2017-06-05 16:46             ` Boris Ostrovsky
  2017-06-05 16:14           ` Anoob Soman
  1 sibling, 2 replies; 22+ messages in thread
From: Anoob Soman @ 2017-06-05 16:14 UTC (permalink / raw)
  To: Boris Ostrovsky, xen-devel, linux-kernel; +Cc: jgross

On 05/06/17 16:32, Boris Ostrovsky wrote:
> I believe we do need to take affinity into consideration even if the
> chance that it is non-default is small.

Agreed.

> I am not opposed to having bind_last_selected_cpu percpu, I just wanted
> to understand the reason better. Additional locking would be a downside
> with a global so if you feel that percpu is worth it then I won't object.

If affinity == cpu_online_mask, then percpu will give a better spread. 
atomic set/get can be used, if we want to use a global variable, but I 
think it will be more random than percpu.

>
>> Yes, you are correct. .irq_set_affinity pretty much does the same thing.
>>
>> The code will now looks like this.
>> raw_spin_lock_irqsave(lock, flags);
>> percpu read
>> select_cpu
>> percpu write
>> xen_rebind_evtchn_to_cpu(evtchn, selected_cpu)
>> raw_spin_unlock_irqsave(lock, flags);
> (BTW, I just noticed --- you don't need to initialize desc)

Sorry, I didn't get it. Which desc doesn't need init ?

-Anoob

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] xen-evtchn: Bind dyn evtchn:qemu-dm interrupt to next online VCPU
  2017-06-05 15:32         ` Boris Ostrovsky
  2017-06-05 16:14           ` Anoob Soman
@ 2017-06-05 16:14           ` Anoob Soman
  1 sibling, 0 replies; 22+ messages in thread
From: Anoob Soman @ 2017-06-05 16:14 UTC (permalink / raw)
  To: Boris Ostrovsky, xen-devel, linux-kernel; +Cc: jgross

On 05/06/17 16:32, Boris Ostrovsky wrote:
> I believe we do need to take affinity into consideration even if the
> chance that it is non-default is small.

Agreed.

> I am not opposed to having bind_last_selected_cpu percpu, I just wanted
> to understand the reason better. Additional locking would be a downside
> with a global so if you feel that percpu is worth it then I won't object.

If affinity == cpu_online_mask, then percpu will give a better spread. 
atomic set/get can be used, if we want to use a global variable, but I 
think it will be more random than percpu.

>
>> Yes, you are correct. .irq_set_affinity pretty much does the same thing.
>>
>> The code will now looks like this.
>> raw_spin_lock_irqsave(lock, flags);
>> percpu read
>> select_cpu
>> percpu write
>> xen_rebind_evtchn_to_cpu(evtchn, selected_cpu)
>> raw_spin_unlock_irqsave(lock, flags);
> (BTW, I just noticed --- you don't need to initialize desc)

Sorry, I didn't get it. Which desc doesn't need init ?

-Anoob

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [Xen-devel] [PATCH] xen-evtchn: Bind dyn evtchn:qemu-dm interrupt to next online VCPU
  2017-06-05 16:14           ` Anoob Soman
@ 2017-06-05 16:46             ` Boris Ostrovsky
  2017-06-05 17:13               ` Anoob Soman
  2017-06-05 17:13               ` Anoob Soman
  2017-06-05 16:46             ` Boris Ostrovsky
  1 sibling, 2 replies; 22+ messages in thread
From: Boris Ostrovsky @ 2017-06-05 16:46 UTC (permalink / raw)
  To: Anoob Soman, xen-devel, linux-kernel; +Cc: jgross

 
>> (BTW, I just noticed --- you don't need to initialize desc)
>
> Sorry, I didn't get it. Which desc doesn't need init ?

+static void evtchn_bind_interdom_next_vcpu(int evtchn)
+{
+	unsigned int selected_cpu, irq;
+	struct irq_desc *desc = NULL;  <====
+	unsigned long flags;
+
+	irq = irq_from_evtchn(evtchn);
+	desc = irq_to_desc(irq);



-boris

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] xen-evtchn: Bind dyn evtchn:qemu-dm interrupt to next online VCPU
  2017-06-05 16:14           ` Anoob Soman
  2017-06-05 16:46             ` Boris Ostrovsky
@ 2017-06-05 16:46             ` Boris Ostrovsky
  1 sibling, 0 replies; 22+ messages in thread
From: Boris Ostrovsky @ 2017-06-05 16:46 UTC (permalink / raw)
  To: Anoob Soman, xen-devel, linux-kernel; +Cc: jgross

 
>> (BTW, I just noticed --- you don't need to initialize desc)
>
> Sorry, I didn't get it. Which desc doesn't need init ?

+static void evtchn_bind_interdom_next_vcpu(int evtchn)
+{
+	unsigned int selected_cpu, irq;
+	struct irq_desc *desc = NULL;  <====
+	unsigned long flags;
+
+	irq = irq_from_evtchn(evtchn);
+	desc = irq_to_desc(irq);



-boris


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [Xen-devel] [PATCH] xen-evtchn: Bind dyn evtchn:qemu-dm interrupt to next online VCPU
  2017-06-05 16:46             ` Boris Ostrovsky
@ 2017-06-05 17:13               ` Anoob Soman
  2017-06-05 17:13               ` Anoob Soman
  1 sibling, 0 replies; 22+ messages in thread
From: Anoob Soman @ 2017-06-05 17:13 UTC (permalink / raw)
  To: Boris Ostrovsky, xen-devel, linux-kernel; +Cc: jgross

On 05/06/17 17:46, Boris Ostrovsky wrote:
>
> +static void evtchn_bind_interdom_next_vcpu(int evtchn)
> +{
> +	unsigned int selected_cpu, irq;
> +	struct irq_desc *desc = NULL;  <====
>

Oh, thanks. I will send out a V2, with the modifications.

-Anoob.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] xen-evtchn: Bind dyn evtchn:qemu-dm interrupt to next online VCPU
  2017-06-05 16:46             ` Boris Ostrovsky
  2017-06-05 17:13               ` Anoob Soman
@ 2017-06-05 17:13               ` Anoob Soman
  1 sibling, 0 replies; 22+ messages in thread
From: Anoob Soman @ 2017-06-05 17:13 UTC (permalink / raw)
  To: Boris Ostrovsky, xen-devel, linux-kernel; +Cc: jgross

On 05/06/17 17:46, Boris Ostrovsky wrote:
>
> +static void evtchn_bind_interdom_next_vcpu(int evtchn)
> +{
> +	unsigned int selected_cpu, irq;
> +	struct irq_desc *desc = NULL;  <====
>

Oh, thanks. I will send out a V2, with the modifications.

-Anoob.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH] xen-evtchn: Bind dyn evtchn:qemu-dm interrupt to next online VCPU
@ 2017-06-02 14:49 Anoob Soman
  0 siblings, 0 replies; 22+ messages in thread
From: Anoob Soman @ 2017-06-02 14:49 UTC (permalink / raw)
  To: xen-devel, linux-kernel; +Cc: jgross, boris.ostrovsky, Anoob Soman

A HVM domian booting generates around 200K (evtchn:qemu-dm xen-dyn)
interrupts,in a short period of time. All these evtchn:qemu-dm are bound
to VCPU 0, until irqbalance sees these IRQ and moves it to a different VCPU.
In one configuration, irqbalance runs every 10 seconds, which means
irqbalance doesn't get to see these burst of interrupts and doesn't
re-balance interrupts most of the time, making all evtchn:qemu-dm to be
processed by VCPU0. This cause VCPU0 to spend most of time processing
hardirq and very little time on softirq. Moreover, if dom0 kernel PREEMPTION
is disabled, VCPU0 never runs watchdog (process context), triggering a
softlockup detection code to panic.

Binding evtchn:qemu-dm to next online VCPU, will spread hardirq
processing evenly across different CPU. Later, irqbalance will try to balance
evtchn:qemu-dm, if required.

Signed-off-by: Anoob Soman <anoob.soman@citrix.com>
---
 drivers/xen/events/events_base.c |  9 +++++++--
 drivers/xen/evtchn.c             | 36 +++++++++++++++++++++++++++++++++++-
 include/xen/events.h             |  1 +
 3 files changed, 43 insertions(+), 3 deletions(-)

diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index b52852f..8224ec1 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -1303,10 +1303,9 @@ void rebind_evtchn_irq(int evtchn, int irq)
 }
 
 /* Rebind an evtchn so that it gets delivered to a specific cpu */
-static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
+int xen_rebind_evtchn_to_cpu(int evtchn, unsigned tcpu)
 {
 	struct evtchn_bind_vcpu bind_vcpu;
-	int evtchn = evtchn_from_irq(irq);
 	int masked;
 
 	if (!VALID_EVTCHN(evtchn))
@@ -1338,6 +1337,12 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(xen_rebind_evtchn_to_cpu);
+
+static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
+{
+	return xen_rebind_evtchn_to_cpu(evtchn_from_irq(irq), tcpu);
+}
 
 static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest,
 			    bool force)
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
index 10f1ef5..1192f24 100644
--- a/drivers/xen/evtchn.c
+++ b/drivers/xen/evtchn.c
@@ -58,6 +58,8 @@
 #include <xen/xen-ops.h>
 #include <asm/xen/hypervisor.h>
 
+static DEFINE_PER_CPU(int, bind_last_selected_cpu);
+
 struct per_user_data {
 	struct mutex bind_mutex; /* serialize bind/unbind operations */
 	struct rb_root evtchns;
@@ -421,6 +423,36 @@ static void evtchn_unbind_from_user(struct per_user_data *u,
 	del_evtchn(u, evtchn);
 }
 
+static void evtchn_bind_interdom_next_vcpu(int evtchn)
+{
+	unsigned int selected_cpu, irq;
+	struct irq_desc *desc = NULL;
+	unsigned long flags;
+
+	irq = irq_from_evtchn(evtchn);
+	desc = irq_to_desc(irq);
+
+	if (!desc)
+		return;
+
+	raw_spin_lock_irqsave(&desc->lock, flags);
+	selected_cpu = this_cpu_read(bind_last_selected_cpu);
+	selected_cpu = cpumask_next_and(selected_cpu,
+			desc->irq_common_data.affinity, cpu_online_mask);
+
+	if (unlikely(selected_cpu >= nr_cpu_ids))
+		selected_cpu = cpumask_first_and(desc->irq_common_data.affinity,
+				cpu_online_mask);
+
+	raw_spin_unlock_irqrestore(&desc->lock, flags);
+	this_cpu_write(bind_last_selected_cpu, selected_cpu);
+
+	local_irq_disable();
+	/* unmask expects irqs to be disabled */
+	xen_rebind_evtchn_to_cpu(evtchn, selected_cpu);
+	local_irq_enable();
+}
+
 static long evtchn_ioctl(struct file *file,
 			 unsigned int cmd, unsigned long arg)
 {
@@ -478,8 +510,10 @@ static long evtchn_ioctl(struct file *file,
 			break;
 
 		rc = evtchn_bind_to_user(u, bind_interdomain.local_port);
-		if (rc == 0)
+		if (rc == 0) {
 			rc = bind_interdomain.local_port;
+			evtchn_bind_interdom_next_vcpu(rc);
+		}
 		break;
 	}
 
diff --git a/include/xen/events.h b/include/xen/events.h
index 88da2ab..f442ca5 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -58,6 +58,7 @@ int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
 
 void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector);
 void rebind_evtchn_irq(int evtchn, int irq);
+int xen_rebind_evtchn_to_cpu(int evtchn, unsigned tcpu);
 
 static inline void notify_remote_via_evtchn(int port)
 {
-- 
1.8.3.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2017-06-05 17:14 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-06-02 14:49 [PATCH] xen-evtchn: Bind dyn evtchn:qemu-dm interrupt to next online VCPU Anoob Soman
2017-06-02 15:10 ` Juergen Gross
2017-06-02 15:10 ` Juergen Gross
2017-06-05  9:54   ` Anoob Soman
2017-06-05  9:54   ` Anoob Soman
2017-06-02 16:24 ` Boris Ostrovsky
2017-06-02 16:24 ` Boris Ostrovsky
2017-06-05 10:14   ` Anoob Soman
2017-06-05 14:10     ` Boris Ostrovsky
2017-06-05 14:49       ` [Xen-devel] " Anoob Soman
2017-06-05 15:32         ` Boris Ostrovsky
2017-06-05 16:14           ` Anoob Soman
2017-06-05 16:46             ` Boris Ostrovsky
2017-06-05 17:13               ` Anoob Soman
2017-06-05 17:13               ` Anoob Soman
2017-06-05 16:46             ` Boris Ostrovsky
2017-06-05 16:14           ` Anoob Soman
2017-06-05 15:32         ` Boris Ostrovsky
2017-06-05 14:49       ` Anoob Soman
2017-06-05 14:10     ` Boris Ostrovsky
2017-06-05 10:14   ` Anoob Soman
2017-06-02 14:49 Anoob Soman

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.