All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] xen/events/fifo: Consume unprocessed events when a CPU dies
@ 2015-06-19 15:15 Ross Lagerwall
  2015-06-19 16:02 ` Boris Ostrovsky
  2015-12-02 13:53 ` David Vrabel
  0 siblings, 2 replies; 9+ messages in thread
From: Ross Lagerwall @ 2015-06-19 15:15 UTC (permalink / raw)
  To: xen-devel; +Cc: Ross Lagerwall, Boris Ostrovsky, David Vrabel

When a CPU is offlined, there may be unprocessed events on a port for
that CPU.  If the port is subsequently reused on a different CPU, it
could be in an unexpected state with the link bit set, resulting in
interrupts being missed. Fix this by consuming any unprocessed events
for a particular CPU when that CPU dies.

Signed-off-by: Ross Lagerwall <ross.lagerwall@citrix.com>
---
 drivers/xen/events/events_fifo.c | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/drivers/xen/events/events_fifo.c b/drivers/xen/events/events_fifo.c
index 417415d..1dd0ba12 100644
--- a/drivers/xen/events/events_fifo.c
+++ b/drivers/xen/events/events_fifo.c
@@ -281,7 +281,8 @@ static void handle_irq_for_port(unsigned port)
 
 static void consume_one_event(unsigned cpu,
 			      struct evtchn_fifo_control_block *control_block,
-			      unsigned priority, unsigned long *ready)
+			      unsigned priority, unsigned long *ready,
+			      bool drop)
 {
 	struct evtchn_fifo_queue *q = &per_cpu(cpu_queue, cpu);
 	uint32_t head;
@@ -313,13 +314,17 @@ static void consume_one_event(unsigned cpu,
 	if (head == 0)
 		clear_bit(priority, ready);
 
-	if (evtchn_fifo_is_pending(port) && !evtchn_fifo_is_masked(port))
-		handle_irq_for_port(port);
+	if (evtchn_fifo_is_pending(port) && !evtchn_fifo_is_masked(port)) {
+		if (unlikely(drop))
+			pr_warn("Dropping pending event for port %u\n", port);
+		else
+			handle_irq_for_port(port);
+	}
 
 	q->head[priority] = head;
 }
 
-static void evtchn_fifo_handle_events(unsigned cpu)
+static void __evtchn_fifo_handle_events(unsigned cpu, bool drop)
 {
 	struct evtchn_fifo_control_block *control_block;
 	unsigned long ready;
@@ -331,11 +336,16 @@ static void evtchn_fifo_handle_events(unsigned cpu)
 
 	while (ready) {
 		q = find_first_bit(&ready, EVTCHN_FIFO_MAX_QUEUES);
-		consume_one_event(cpu, control_block, q, &ready);
+		consume_one_event(cpu, control_block, q, &ready, drop);
 		ready |= xchg(&control_block->ready, 0);
 	}
 }
 
+static void evtchn_fifo_handle_events(unsigned cpu)
+{
+	__evtchn_fifo_handle_events(cpu, false);
+}
+
 static void evtchn_fifo_resume(void)
 {
 	unsigned cpu;
@@ -420,6 +430,9 @@ static int evtchn_fifo_cpu_notification(struct notifier_block *self,
 		if (!per_cpu(cpu_control_block, cpu))
 			ret = evtchn_fifo_alloc_control_block(cpu);
 		break;
+	case CPU_DEAD:
+		__evtchn_fifo_handle_events(cpu, true);
+		break;
 	default:
 		break;
 	}
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH] xen/events/fifo: Consume unprocessed events when a CPU dies
  2015-06-19 15:15 [PATCH] xen/events/fifo: Consume unprocessed events when a CPU dies Ross Lagerwall
@ 2015-06-19 16:02 ` Boris Ostrovsky
  2015-06-19 16:06   ` David Vrabel
  2015-12-02 13:53 ` David Vrabel
  1 sibling, 1 reply; 9+ messages in thread
From: Boris Ostrovsky @ 2015-06-19 16:02 UTC (permalink / raw)
  To: Ross Lagerwall, xen-devel; +Cc: David Vrabel

On 06/19/2015 11:15 AM, Ross Lagerwall wrote:
> When a CPU is offlined, there may be unprocessed events on a port for
> that CPU.  If the port is subsequently reused on a different CPU, it
> could be in an unexpected state with the link bit set, resulting in
> interrupts being missed. Fix this by consuming any unprocessed events
> for a particular CPU when that CPU dies.
>
> Signed-off-by: Ross Lagerwall <ross.lagerwall@citrix.com>
> ---
>   drivers/xen/events/events_fifo.c | 23 ++++++++++++++++++-----
>   1 file changed, 18 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/xen/events/events_fifo.c b/drivers/xen/events/events_fifo.c
> index 417415d..1dd0ba12 100644
> --- a/drivers/xen/events/events_fifo.c
> +++ b/drivers/xen/events/events_fifo.c
> @@ -281,7 +281,8 @@ static void handle_irq_for_port(unsigned port)
>   
>   static void consume_one_event(unsigned cpu,
>   			      struct evtchn_fifo_control_block *control_block,
> -			      unsigned priority, unsigned long *ready)
> +			      unsigned priority, unsigned long *ready,
> +			      bool drop)
>   {
>   	struct evtchn_fifo_queue *q = &per_cpu(cpu_queue, cpu);
>   	uint32_t head;
> @@ -313,13 +314,17 @@ static void consume_one_event(unsigned cpu,
>   	if (head == 0)
>   		clear_bit(priority, ready);
>   
> -	if (evtchn_fifo_is_pending(port) && !evtchn_fifo_is_masked(port))
> -		handle_irq_for_port(port);
> +	if (evtchn_fifo_is_pending(port) && !evtchn_fifo_is_masked(port)) {
> +		if (unlikely(drop))
> +			pr_warn("Dropping pending event for port %u\n", port);

Maybe pr_info (or pr_notice)?

Also, why not do this (testing for unprocessed events) in 
xen_evtchn_close()?

-boris

> +		else
> +			handle_irq_for_port(port);
> +	}
>   
>   	q->head[priority] = head;
>   }
>   
> -static void evtchn_fifo_handle_events(unsigned cpu)
> +static void __evtchn_fifo_handle_events(unsigned cpu, bool drop)
>   {
>   	struct evtchn_fifo_control_block *control_block;
>   	unsigned long ready;
> @@ -331,11 +336,16 @@ static void evtchn_fifo_handle_events(unsigned cpu)
>   
>   	while (ready) {
>   		q = find_first_bit(&ready, EVTCHN_FIFO_MAX_QUEUES);
> -		consume_one_event(cpu, control_block, q, &ready);
> +		consume_one_event(cpu, control_block, q, &ready, drop);
>   		ready |= xchg(&control_block->ready, 0);
>   	}
>   }
>   
> +static void evtchn_fifo_handle_events(unsigned cpu)
> +{
> +	__evtchn_fifo_handle_events(cpu, false);
> +}
> +
>   static void evtchn_fifo_resume(void)
>   {
>   	unsigned cpu;
> @@ -420,6 +430,9 @@ static int evtchn_fifo_cpu_notification(struct notifier_block *self,
>   		if (!per_cpu(cpu_control_block, cpu))
>   			ret = evtchn_fifo_alloc_control_block(cpu);
>   		break;
> +	case CPU_DEAD:
> +		__evtchn_fifo_handle_events(cpu, true);
> +		break;
>   	default:
>   		break;
>   	}

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] xen/events/fifo: Consume unprocessed events when a CPU dies
  2015-06-19 16:02 ` Boris Ostrovsky
@ 2015-06-19 16:06   ` David Vrabel
  2015-06-29 10:19     ` Ross Lagerwall
  0 siblings, 1 reply; 9+ messages in thread
From: David Vrabel @ 2015-06-19 16:06 UTC (permalink / raw)
  To: Boris Ostrovsky, Ross Lagerwall, xen-devel

On 19/06/15 17:02, Boris Ostrovsky wrote:
> On 06/19/2015 11:15 AM, Ross Lagerwall wrote:
>> When a CPU is offlined, there may be unprocessed events on a port for
>> that CPU.  If the port is subsequently reused on a different CPU, it
>> could be in an unexpected state with the link bit set, resulting in
>> interrupts being missed. Fix this by consuming any unprocessed events
>> for a particular CPU when that CPU dies.
>>
>> Signed-off-by: Ross Lagerwall <ross.lagerwall@citrix.com>
>> ---
>>   drivers/xen/events/events_fifo.c | 23 ++++++++++++++++++-----
>>   1 file changed, 18 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/xen/events/events_fifo.c
>> b/drivers/xen/events/events_fifo.c
>> index 417415d..1dd0ba12 100644
>> --- a/drivers/xen/events/events_fifo.c
>> +++ b/drivers/xen/events/events_fifo.c
>> @@ -281,7 +281,8 @@ static void handle_irq_for_port(unsigned port)
>>     static void consume_one_event(unsigned cpu,
>>                     struct evtchn_fifo_control_block *control_block,
>> -                  unsigned priority, unsigned long *ready)
>> +                  unsigned priority, unsigned long *ready,
>> +                  bool drop)
>>   {
>>       struct evtchn_fifo_queue *q = &per_cpu(cpu_queue, cpu);
>>       uint32_t head;
>> @@ -313,13 +314,17 @@ static void consume_one_event(unsigned cpu,
>>       if (head == 0)
>>           clear_bit(priority, ready);
>>   -    if (evtchn_fifo_is_pending(port) && !evtchn_fifo_is_masked(port))
>> -        handle_irq_for_port(port);
>> +    if (evtchn_fifo_is_pending(port) && !evtchn_fifo_is_masked(port)) {
>> +        if (unlikely(drop))
>> +            pr_warn("Dropping pending event for port %u\n", port);
> 
> Maybe pr_info (or pr_notice)?

We want a warning here because we think this shouldn't happen -- if it
does we actually need to retrigger the event on its new CPU.

> Also, why not do this (testing for unprocessed events) in
> xen_evtchn_close()?

We can't do anything about them when closing because they may be in the
middle of a queue.

David

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] xen/events/fifo: Consume unprocessed events when a CPU dies
  2015-06-19 16:06   ` David Vrabel
@ 2015-06-29 10:19     ` Ross Lagerwall
  2015-06-29 13:32       ` Boris Ostrovsky
  0 siblings, 1 reply; 9+ messages in thread
From: Ross Lagerwall @ 2015-06-29 10:19 UTC (permalink / raw)
  To: David Vrabel, Boris Ostrovsky, xen-devel

On 06/19/2015 05:06 PM, David Vrabel wrote:
> On 19/06/15 17:02, Boris Ostrovsky wrote:
>> On 06/19/2015 11:15 AM, Ross Lagerwall wrote:
>>> When a CPU is offlined, there may be unprocessed events on a port for
>>> that CPU.  If the port is subsequently reused on a different CPU, it
>>> could be in an unexpected state with the link bit set, resulting in
>>> interrupts being missed. Fix this by consuming any unprocessed events
>>> for a particular CPU when that CPU dies.
>>>
>>> Signed-off-by: Ross Lagerwall <ross.lagerwall@citrix.com>
>>> ---
>>>    drivers/xen/events/events_fifo.c | 23 ++++++++++++++++++-----
>>>    1 file changed, 18 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/drivers/xen/events/events_fifo.c
>>> b/drivers/xen/events/events_fifo.c
>>> index 417415d..1dd0ba12 100644
>>> --- a/drivers/xen/events/events_fifo.c
>>> +++ b/drivers/xen/events/events_fifo.c
>>> @@ -281,7 +281,8 @@ static void handle_irq_for_port(unsigned port)
>>>      static void consume_one_event(unsigned cpu,
>>>                      struct evtchn_fifo_control_block *control_block,
>>> -                  unsigned priority, unsigned long *ready)
>>> +                  unsigned priority, unsigned long *ready,
>>> +                  bool drop)
>>>    {
>>>        struct evtchn_fifo_queue *q = &per_cpu(cpu_queue, cpu);
>>>        uint32_t head;
>>> @@ -313,13 +314,17 @@ static void consume_one_event(unsigned cpu,
>>>        if (head == 0)
>>>            clear_bit(priority, ready);
>>>    -    if (evtchn_fifo_is_pending(port) && !evtchn_fifo_is_masked(port))
>>> -        handle_irq_for_port(port);
>>> +    if (evtchn_fifo_is_pending(port) && !evtchn_fifo_is_masked(port)) {
>>> +        if (unlikely(drop))
>>> +            pr_warn("Dropping pending event for port %u\n", port);
>>
>> Maybe pr_info (or pr_notice)?
>
> We want a warning here because we think this shouldn't happen -- if it
> does we actually need to retrigger the event on its new CPU.
>
>> Also, why not do this (testing for unprocessed events) in
>> xen_evtchn_close()?
>
> We can't do anything about them when closing because they may be in the
> middle of a queue.
>
> David
>

Ping. Is this change OK?

-- 
Ross Lagerwall

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] xen/events/fifo: Consume unprocessed events when a CPU dies
  2015-06-29 10:19     ` Ross Lagerwall
@ 2015-06-29 13:32       ` Boris Ostrovsky
  2015-06-30  9:51         ` Ross Lagerwall
  0 siblings, 1 reply; 9+ messages in thread
From: Boris Ostrovsky @ 2015-06-29 13:32 UTC (permalink / raw)
  To: Ross Lagerwall, David Vrabel, xen-devel

On 06/29/2015 06:19 AM, Ross Lagerwall wrote:
> On 06/19/2015 05:06 PM, David Vrabel wrote:
>> On 19/06/15 17:02, Boris Ostrovsky wrote:
>>> On 06/19/2015 11:15 AM, Ross Lagerwall wrote:
>>>> When a CPU is offlined, there may be unprocessed events on a port for
>>>> that CPU.  If the port is subsequently reused on a different CPU, it
>>>> could be in an unexpected state with the link bit set, resulting in
>>>> interrupts being missed. Fix this by consuming any unprocessed events
>>>> for a particular CPU when that CPU dies.
>>>>
>>>> Signed-off-by: Ross Lagerwall <ross.lagerwall@citrix.com>
>>>> ---
>>>>    drivers/xen/events/events_fifo.c | 23 ++++++++++++++++++-----
>>>>    1 file changed, 18 insertions(+), 5 deletions(-)
>>>>
>>>> diff --git a/drivers/xen/events/events_fifo.c
>>>> b/drivers/xen/events/events_fifo.c
>>>> index 417415d..1dd0ba12 100644
>>>> --- a/drivers/xen/events/events_fifo.c
>>>> +++ b/drivers/xen/events/events_fifo.c
>>>> @@ -281,7 +281,8 @@ static void handle_irq_for_port(unsigned port)
>>>>      static void consume_one_event(unsigned cpu,
>>>>                      struct evtchn_fifo_control_block *control_block,
>>>> -                  unsigned priority, unsigned long *ready)
>>>> +                  unsigned priority, unsigned long *ready,
>>>> +                  bool drop)
>>>>    {
>>>>        struct evtchn_fifo_queue *q = &per_cpu(cpu_queue, cpu);
>>>>        uint32_t head;
>>>> @@ -313,13 +314,17 @@ static void consume_one_event(unsigned cpu,
>>>>        if (head == 0)
>>>>            clear_bit(priority, ready);
>>>>    -    if (evtchn_fifo_is_pending(port) && 
>>>> !evtchn_fifo_is_masked(port))
>>>> -        handle_irq_for_port(port);
>>>> +    if (evtchn_fifo_is_pending(port) && 
>>>> !evtchn_fifo_is_masked(port)) {
>>>> +        if (unlikely(drop))
>>>> +            pr_warn("Dropping pending event for port %u\n", port);
>>>
>>> Maybe pr_info (or pr_notice)?
>>
>> We want a warning here because we think this shouldn't happen -- if it
>> does we actually need to retrigger the event on its new CPU.
>>
>>> Also, why not do this (testing for unprocessed events) in
>>> xen_evtchn_close()?
>>
>> We can't do anything about them when closing because they may be in the
>> middle of a queue.

(Sorry, I missed this)

Why can't (actually, why doesn't) the cpu that is being offlined drain 
its queue?

-boris

>>
>> David
>>
>
> Ping. Is this change OK?
>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] xen/events/fifo: Consume unprocessed events when a CPU dies
  2015-06-29 13:32       ` Boris Ostrovsky
@ 2015-06-30  9:51         ` Ross Lagerwall
  2015-06-30 12:26           ` Boris Ostrovsky
  0 siblings, 1 reply; 9+ messages in thread
From: Ross Lagerwall @ 2015-06-30  9:51 UTC (permalink / raw)
  To: Boris Ostrovsky, David Vrabel, xen-devel

On 06/29/2015 02:32 PM, Boris Ostrovsky wrote:
> On 06/29/2015 06:19 AM, Ross Lagerwall wrote:
>> On 06/19/2015 05:06 PM, David Vrabel wrote:
>>> On 19/06/15 17:02, Boris Ostrovsky wrote:
>>>> On 06/19/2015 11:15 AM, Ross Lagerwall wrote:
>>>>> When a CPU is offlined, there may be unprocessed events on a port for
>>>>> that CPU.  If the port is subsequently reused on a different CPU, it
>>>>> could be in an unexpected state with the link bit set, resulting in
>>>>> interrupts being missed. Fix this by consuming any unprocessed events
>>>>> for a particular CPU when that CPU dies.
>>>>>
>>>>> Signed-off-by: Ross Lagerwall <ross.lagerwall@citrix.com>
>>>>> ---
>>>>>    drivers/xen/events/events_fifo.c | 23 ++++++++++++++++++-----
>>>>>    1 file changed, 18 insertions(+), 5 deletions(-)
>>>>>
>>>>> diff --git a/drivers/xen/events/events_fifo.c
>>>>> b/drivers/xen/events/events_fifo.c
>>>>> index 417415d..1dd0ba12 100644
>>>>> --- a/drivers/xen/events/events_fifo.c
>>>>> +++ b/drivers/xen/events/events_fifo.c
>>>>> @@ -281,7 +281,8 @@ static void handle_irq_for_port(unsigned port)
>>>>>      static void consume_one_event(unsigned cpu,
>>>>>                      struct evtchn_fifo_control_block *control_block,
>>>>> -                  unsigned priority, unsigned long *ready)
>>>>> +                  unsigned priority, unsigned long *ready,
>>>>> +                  bool drop)
>>>>>    {
>>>>>        struct evtchn_fifo_queue *q = &per_cpu(cpu_queue, cpu);
>>>>>        uint32_t head;
>>>>> @@ -313,13 +314,17 @@ static void consume_one_event(unsigned cpu,
>>>>>        if (head == 0)
>>>>>            clear_bit(priority, ready);
>>>>>    -    if (evtchn_fifo_is_pending(port) &&
>>>>> !evtchn_fifo_is_masked(port))
>>>>> -        handle_irq_for_port(port);
>>>>> +    if (evtchn_fifo_is_pending(port) &&
>>>>> !evtchn_fifo_is_masked(port)) {
>>>>> +        if (unlikely(drop))
>>>>> +            pr_warn("Dropping pending event for port %u\n", port);
>>>>
>>>> Maybe pr_info (or pr_notice)?
>>>
>>> We want a warning here because we think this shouldn't happen -- if it
>>> does we actually need to retrigger the event on its new CPU.
>>>
>>>> Also, why not do this (testing for unprocessed events) in
>>>> xen_evtchn_close()?
>>>
>>> We can't do anything about them when closing because they may be in the
>>> middle of a queue.
>
> (Sorry, I missed this)
>
> Why can't (actually, why doesn't) the cpu that is being offlined drain
> its queue?
>

Where would this be done? I thought using CPU notifiers was the correct 
way to hook when a CPU goes down without having to stick fifo event 
channel code in the core Xen code.

-- 
Ross Lagerwall

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] xen/events/fifo: Consume unprocessed events when a CPU dies
  2015-06-30  9:51         ` Ross Lagerwall
@ 2015-06-30 12:26           ` Boris Ostrovsky
  2015-12-02 13:22             ` David Vrabel
  0 siblings, 1 reply; 9+ messages in thread
From: Boris Ostrovsky @ 2015-06-30 12:26 UTC (permalink / raw)
  To: Ross Lagerwall, David Vrabel, xen-devel

On 06/30/2015 05:51 AM, Ross Lagerwall wrote:
> On 06/29/2015 02:32 PM, Boris Ostrovsky wrote:
>> On 06/29/2015 06:19 AM, Ross Lagerwall wrote:
>>> On 06/19/2015 05:06 PM, David Vrabel wrote:
>>>> On 19/06/15 17:02, Boris Ostrovsky wrote:
>>>>> On 06/19/2015 11:15 AM, Ross Lagerwall wrote:
>>>>>> When a CPU is offlined, there may be unprocessed events on a port 
>>>>>> for
>>>>>> that CPU.  If the port is subsequently reused on a different CPU, it
>>>>>> could be in an unexpected state with the link bit set, resulting in
>>>>>> interrupts being missed. Fix this by consuming any unprocessed 
>>>>>> events
>>>>>> for a particular CPU when that CPU dies.
>>>>>>
>>>>>> Signed-off-by: Ross Lagerwall <ross.lagerwall@citrix.com>
>>>>>> ---
>>>>>>    drivers/xen/events/events_fifo.c | 23 ++++++++++++++++++-----
>>>>>>    1 file changed, 18 insertions(+), 5 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/xen/events/events_fifo.c
>>>>>> b/drivers/xen/events/events_fifo.c
>>>>>> index 417415d..1dd0ba12 100644
>>>>>> --- a/drivers/xen/events/events_fifo.c
>>>>>> +++ b/drivers/xen/events/events_fifo.c
>>>>>> @@ -281,7 +281,8 @@ static void handle_irq_for_port(unsigned port)
>>>>>>      static void consume_one_event(unsigned cpu,
>>>>>>                      struct evtchn_fifo_control_block 
>>>>>> *control_block,
>>>>>> -                  unsigned priority, unsigned long *ready)
>>>>>> +                  unsigned priority, unsigned long *ready,
>>>>>> +                  bool drop)
>>>>>>    {
>>>>>>        struct evtchn_fifo_queue *q = &per_cpu(cpu_queue, cpu);
>>>>>>        uint32_t head;
>>>>>> @@ -313,13 +314,17 @@ static void consume_one_event(unsigned cpu,
>>>>>>        if (head == 0)
>>>>>>            clear_bit(priority, ready);
>>>>>>    -    if (evtchn_fifo_is_pending(port) &&
>>>>>> !evtchn_fifo_is_masked(port))
>>>>>> -        handle_irq_for_port(port);
>>>>>> +    if (evtchn_fifo_is_pending(port) &&
>>>>>> !evtchn_fifo_is_masked(port)) {
>>>>>> +        if (unlikely(drop))
>>>>>> +            pr_warn("Dropping pending event for port %u\n", port);
>>>>>
>>>>> Maybe pr_info (or pr_notice)?
>>>>
>>>> We want a warning here because we think this shouldn't happen -- if it
>>>> does we actually need to retrigger the event on its new CPU.
>>>>
>>>>> Also, why not do this (testing for unprocessed events) in
>>>>> xen_evtchn_close()?
>>>>
>>>> We can't do anything about them when closing because they may be in 
>>>> the
>>>> middle of a queue.
>>
>> (Sorry, I missed this)
>>
>> Why can't (actually, why doesn't) the cpu that is being offlined drain
>> its queue?
>>
>
> Where would this be done? I thought using CPU notifiers was the 
> correct way to hook when a CPU goes down without having to stick fifo 
> event channel code in the core Xen code.

In xen_evtchn_close(). We should be getting there (roughly) as cpu_die() 
-> xen_cpu_die() -> xen_smp_intr_free() -> unbind_from_irqhandler(). In 
fact, this path is taken right before cpu_down() sends CPU_DEAD 
notifications.

I think cleaning up in xen_evtchn_close() is better because it is 
possible to close event channel for reasons other than CPU going away, 
in which case we also may need to deal with unprocessed events.

(BTW, I noticed that you are cleaning up fifo events only. Do we need to 
do the same for 2-level?)

-boris

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] xen/events/fifo: Consume unprocessed events when a CPU dies
  2015-06-30 12:26           ` Boris Ostrovsky
@ 2015-12-02 13:22             ` David Vrabel
  0 siblings, 0 replies; 9+ messages in thread
From: David Vrabel @ 2015-12-02 13:22 UTC (permalink / raw)
  To: Boris Ostrovsky, Ross Lagerwall, David Vrabel, xen-devel

On 30/06/15 13:26, Boris Ostrovsky wrote:
> On 06/30/2015 05:51 AM, Ross Lagerwall wrote:
>> On 06/29/2015 02:32 PM, Boris Ostrovsky wrote:
>>> On 06/29/2015 06:19 AM, Ross Lagerwall wrote:
>>>> On 06/19/2015 05:06 PM, David Vrabel wrote:
>>>>> On 19/06/15 17:02, Boris Ostrovsky wrote:
>>>>>> On 06/19/2015 11:15 AM, Ross Lagerwall wrote:
>>>>>>> When a CPU is offlined, there may be unprocessed events on a port
>>>>>>> for
>>>>>>> that CPU.  If the port is subsequently reused on a different CPU, it
>>>>>>> could be in an unexpected state with the link bit set, resulting in
>>>>>>> interrupts being missed. Fix this by consuming any unprocessed
>>>>>>> events
>>>>>>> for a particular CPU when that CPU dies.
>>>>>>>
>>>>>>> Signed-off-by: Ross Lagerwall <ross.lagerwall@citrix.com>
>>>>>>> ---
>>>>>>>    drivers/xen/events/events_fifo.c | 23 ++++++++++++++++++-----
>>>>>>>    1 file changed, 18 insertions(+), 5 deletions(-)
>>>>>>>
>>>>>>> diff --git a/drivers/xen/events/events_fifo.c
>>>>>>> b/drivers/xen/events/events_fifo.c
>>>>>>> index 417415d..1dd0ba12 100644
>>>>>>> --- a/drivers/xen/events/events_fifo.c
>>>>>>> +++ b/drivers/xen/events/events_fifo.c
>>>>>>> @@ -281,7 +281,8 @@ static void handle_irq_for_port(unsigned port)
>>>>>>>      static void consume_one_event(unsigned cpu,
>>>>>>>                      struct evtchn_fifo_control_block
>>>>>>> *control_block,
>>>>>>> -                  unsigned priority, unsigned long *ready)
>>>>>>> +                  unsigned priority, unsigned long *ready,
>>>>>>> +                  bool drop)
>>>>>>>    {
>>>>>>>        struct evtchn_fifo_queue *q = &per_cpu(cpu_queue, cpu);
>>>>>>>        uint32_t head;
>>>>>>> @@ -313,13 +314,17 @@ static void consume_one_event(unsigned cpu,
>>>>>>>        if (head == 0)
>>>>>>>            clear_bit(priority, ready);
>>>>>>>    -    if (evtchn_fifo_is_pending(port) &&
>>>>>>> !evtchn_fifo_is_masked(port))
>>>>>>> -        handle_irq_for_port(port);
>>>>>>> +    if (evtchn_fifo_is_pending(port) &&
>>>>>>> !evtchn_fifo_is_masked(port)) {
>>>>>>> +        if (unlikely(drop))
>>>>>>> +            pr_warn("Dropping pending event for port %u\n", port);
>>>>>>
>>>>>> Maybe pr_info (or pr_notice)?
>>>>>
>>>>> We want a warning here because we think this shouldn't happen -- if it
>>>>> does we actually need to retrigger the event on its new CPU.
>>>>>
>>>>>> Also, why not do this (testing for unprocessed events) in
>>>>>> xen_evtchn_close()?
>>>>>
>>>>> We can't do anything about them when closing because they may be in
>>>>> the
>>>>> middle of a queue.
>>>
>>> (Sorry, I missed this)
>>>
>>> Why can't (actually, why doesn't) the cpu that is being offlined drain
>>> its queue?
>>>
>>
>> Where would this be done? I thought using CPU notifiers was the
>> correct way to hook when a CPU goes down without having to stick fifo
>> event channel code in the core Xen code.
> 
> In xen_evtchn_close(). We should be getting there (roughly) as cpu_die()
> -> xen_cpu_die() -> xen_smp_intr_free() -> unbind_from_irqhandler(). In
> fact, this path is taken right before cpu_down() sends CPU_DEAD
> notifications.
> 
> I think cleaning up in xen_evtchn_close() is better because it is
> possible to close event channel for reasons other than CPU going away,
> in which case we also may need to deal with unprocessed events.

Having looked at this further and attempted to do this, draining events
in close is difficult because

a) we can't wait for LINKED to clear when closing since we're holding
the desc spin lock; and deferring the close to a tasklet or work doesn't
work because:

b) rebinding a PIRQ will fail if the close is deferred and not yet
completed and there is no way to ensure the close happens promptly
without changes to core irq code.

c) Xen will be fixed to not reuse ports that are still LINKED.

I'm going to apply Ross's original patch and Cc stable.

David

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] xen/events/fifo: Consume unprocessed events when a CPU dies
  2015-06-19 15:15 [PATCH] xen/events/fifo: Consume unprocessed events when a CPU dies Ross Lagerwall
  2015-06-19 16:02 ` Boris Ostrovsky
@ 2015-12-02 13:53 ` David Vrabel
  1 sibling, 0 replies; 9+ messages in thread
From: David Vrabel @ 2015-12-02 13:53 UTC (permalink / raw)
  To: Ross Lagerwall, xen-devel; +Cc: Boris Ostrovsky, David Vrabel

On 19/06/15 16:15, Ross Lagerwall wrote:
> When a CPU is offlined, there may be unprocessed events on a port for
> that CPU.  If the port is subsequently reused on a different CPU, it
> could be in an unexpected state with the link bit set, resulting in
> interrupts being missed. Fix this by consuming any unprocessed events
> for a particular CPU when that CPU dies.

Applied to for-linus-4.4, thanks.

David

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2015-12-02 13:53 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-06-19 15:15 [PATCH] xen/events/fifo: Consume unprocessed events when a CPU dies Ross Lagerwall
2015-06-19 16:02 ` Boris Ostrovsky
2015-06-19 16:06   ` David Vrabel
2015-06-29 10:19     ` Ross Lagerwall
2015-06-29 13:32       ` Boris Ostrovsky
2015-06-30  9:51         ` Ross Lagerwall
2015-06-30 12:26           ` Boris Ostrovsky
2015-12-02 13:22             ` David Vrabel
2015-12-02 13:53 ` David Vrabel

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.