All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] iavf: fix locking of critical sections
@ 2021-03-16 10:01 ` Stefan Assmann
  0 siblings, 0 replies; 18+ messages in thread
From: Stefan Assmann @ 2021-03-16 10:01 UTC (permalink / raw)
  To: intel-wired-lan
  Cc: netdev, anthony.l.nguyen, lihong.yang, jesse.brandeburg,
	slawomirx.laba, nicholas.d.nunley, sassmann

To avoid races between iavf_init_task(), iavf_reset_task(),
iavf_watchdog_task(), iavf_adminq_task() as well as the shutdown and
remove functions more locking is required.
The current protection by __IAVF_IN_CRITICAL_TASK is needed in
additional places.

- The reset task performs state transitions, therefore needs locking.
- The adminq task acts on replies from the PF in
  iavf_virtchnl_completion() which may alter the states.
- The init task is not only run during probe but also if a VF gets stuck
  to reinitialize it.
- The shutdown function performs a state transition.
- The remove function perorms a state transition and also free's
  resources.

iavf_lock_timeout() is introduced to avoid waiting infinitely
and cause a deadlock. Rather unlock and print a warning.

Signed-off-by: Stefan Assmann <sassmann@kpanic.de>
---
 drivers/net/ethernet/intel/iavf/iavf_main.c | 57 ++++++++++++++++++---
 1 file changed, 50 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index dc5b3c06d1e0..538b7aa43fa5 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -131,6 +131,30 @@ enum iavf_status iavf_free_virt_mem_d(struct iavf_hw *hw,
 	return 0;
 }
 
+/**
+ * iavf_timeout - try to set bit but give up after timeout
+ * @adapter: board private structure
+ * @bit: bit to set
+ * @msecs: timeout in msecs
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static inline int iavf_lock_timeout(struct iavf_adapter *adapter,
+				    enum iavf_critical_section_t bit,
+				    unsigned int msecs)
+{
+	unsigned int wait, delay = 10;
+
+	for (wait = 0; wait < msecs; wait += delay) {
+		if (!test_and_set_bit(bit, &adapter->crit_section))
+			return 0;
+
+		msleep(delay);
+	}
+
+	return -1;
+}
+
 /**
  * iavf_schedule_reset - Set the flags and schedule a reset event
  * @adapter: board private structure
@@ -2069,6 +2093,10 @@ static void iavf_reset_task(struct work_struct *work)
 	if (test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section))
 		return;
 
+	if (iavf_lock_timeout(adapter, __IAVF_IN_CRITICAL_TASK, 200)) {
+		schedule_work(&adapter->reset_task);
+		return;
+	}
 	while (test_and_set_bit(__IAVF_IN_CLIENT_TASK,
 				&adapter->crit_section))
 		usleep_range(500, 1000);
@@ -2275,6 +2303,8 @@ static void iavf_adminq_task(struct work_struct *work)
 	if (!event.msg_buf)
 		goto out;
 
+	if (iavf_lock_timeout(adapter, __IAVF_IN_CRITICAL_TASK, 200))
+		goto freedom;
 	do {
 		ret = iavf_clean_arq_element(hw, &event, &pending);
 		v_op = (enum virtchnl_ops)le32_to_cpu(event.desc.cookie_high);
@@ -2288,6 +2318,7 @@ static void iavf_adminq_task(struct work_struct *work)
 		if (pending != 0)
 			memset(event.msg_buf, 0, IAVF_MAX_AQ_BUF_SIZE);
 	} while (pending);
+	clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
 
 	if ((adapter->flags &
 	     (IAVF_FLAG_RESET_PENDING | IAVF_FLAG_RESET_NEEDED)) ||
@@ -3590,6 +3621,10 @@ static void iavf_init_task(struct work_struct *work)
 						    init_task.work);
 	struct iavf_hw *hw = &adapter->hw;
 
+	if (iavf_lock_timeout(adapter, __IAVF_IN_CRITICAL_TASK, 5000)) {
+		dev_warn(&adapter->pdev->dev, "failed to set __IAVF_IN_CRITICAL_TASK in %s\n", __FUNCTION__);
+		return;
+	}
 	switch (adapter->state) {
 	case __IAVF_STARTUP:
 		if (iavf_startup(adapter) < 0)
@@ -3602,14 +3637,14 @@ static void iavf_init_task(struct work_struct *work)
 	case __IAVF_INIT_GET_RESOURCES:
 		if (iavf_init_get_resources(adapter) < 0)
 			goto init_failed;
-		return;
+		goto out;
 	default:
 		goto init_failed;
 	}
 
 	queue_delayed_work(iavf_wq, &adapter->init_task,
 			   msecs_to_jiffies(30));
-	return;
+	goto out;
 init_failed:
 	if (++adapter->aq_wait_count > IAVF_AQ_MAX_ERR) {
 		dev_err(&adapter->pdev->dev,
@@ -3618,9 +3653,11 @@ static void iavf_init_task(struct work_struct *work)
 		iavf_shutdown_adminq(hw);
 		adapter->state = __IAVF_STARTUP;
 		queue_delayed_work(iavf_wq, &adapter->init_task, HZ * 5);
-		return;
+		goto out;
 	}
 	queue_delayed_work(iavf_wq, &adapter->init_task, HZ);
+out:
+	clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
 }
 
 /**
@@ -3637,9 +3674,12 @@ static void iavf_shutdown(struct pci_dev *pdev)
 	if (netif_running(netdev))
 		iavf_close(netdev);
 
+	if (iavf_lock_timeout(adapter, __IAVF_IN_CRITICAL_TASK, 5000))
+		dev_warn(&adapter->pdev->dev, "failed to set __IAVF_IN_CRITICAL_TASK in %s\n", __FUNCTION__);
 	/* Prevent the watchdog from running. */
 	adapter->state = __IAVF_REMOVE;
 	adapter->aq_required = 0;
+	clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
 
 #ifdef CONFIG_PM
 	pci_save_state(pdev);
@@ -3866,10 +3906,6 @@ static void iavf_remove(struct pci_dev *pdev)
 				 err);
 	}
 
-	/* Shut down all the garbage mashers on the detention level */
-	adapter->state = __IAVF_REMOVE;
-	adapter->aq_required = 0;
-	adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED;
 	iavf_request_reset(adapter);
 	msleep(50);
 	/* If the FW isn't responding, kick it once, but only once. */
@@ -3877,6 +3913,13 @@ static void iavf_remove(struct pci_dev *pdev)
 		iavf_request_reset(adapter);
 		msleep(50);
 	}
+	if (iavf_lock_timeout(adapter, __IAVF_IN_CRITICAL_TASK, 5000))
+		dev_warn(&adapter->pdev->dev, "failed to set __IAVF_IN_CRITICAL_TASK in %s\n", __FUNCTION__);
+
+	/* Shut down all the garbage mashers on the detention level */
+	adapter->state = __IAVF_REMOVE;
+	adapter->aq_required = 0;
+	adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED;
 	iavf_free_all_tx_resources(adapter);
 	iavf_free_all_rx_resources(adapter);
 	iavf_misc_irq_disable(adapter);
-- 
2.29.2


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [Intel-wired-lan] [PATCH] iavf: fix locking of critical sections
@ 2021-03-16 10:01 ` Stefan Assmann
  0 siblings, 0 replies; 18+ messages in thread
From: Stefan Assmann @ 2021-03-16 10:01 UTC (permalink / raw)
  To: intel-wired-lan

To avoid races between iavf_init_task(), iavf_reset_task(),
iavf_watchdog_task(), iavf_adminq_task() as well as the shutdown and
remove functions more locking is required.
The current protection by __IAVF_IN_CRITICAL_TASK is needed in
additional places.

- The reset task performs state transitions, therefore needs locking.
- The adminq task acts on replies from the PF in
  iavf_virtchnl_completion() which may alter the states.
- The init task is not only run during probe but also if a VF gets stuck
  to reinitialize it.
- The shutdown function performs a state transition.
- The remove function perorms a state transition and also free's
  resources.

iavf_lock_timeout() is introduced to avoid waiting infinitely
and cause a deadlock. Rather unlock and print a warning.

Signed-off-by: Stefan Assmann <sassmann@kpanic.de>
---
 drivers/net/ethernet/intel/iavf/iavf_main.c | 57 ++++++++++++++++++---
 1 file changed, 50 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index dc5b3c06d1e0..538b7aa43fa5 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -131,6 +131,30 @@ enum iavf_status iavf_free_virt_mem_d(struct iavf_hw *hw,
 	return 0;
 }
 
+/**
+ * iavf_timeout - try to set bit but give up after timeout
+ * @adapter: board private structure
+ * @bit: bit to set
+ * @msecs: timeout in msecs
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static inline int iavf_lock_timeout(struct iavf_adapter *adapter,
+				    enum iavf_critical_section_t bit,
+				    unsigned int msecs)
+{
+	unsigned int wait, delay = 10;
+
+	for (wait = 0; wait < msecs; wait += delay) {
+		if (!test_and_set_bit(bit, &adapter->crit_section))
+			return 0;
+
+		msleep(delay);
+	}
+
+	return -1;
+}
+
 /**
  * iavf_schedule_reset - Set the flags and schedule a reset event
  * @adapter: board private structure
@@ -2069,6 +2093,10 @@ static void iavf_reset_task(struct work_struct *work)
 	if (test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section))
 		return;
 
+	if (iavf_lock_timeout(adapter, __IAVF_IN_CRITICAL_TASK, 200)) {
+		schedule_work(&adapter->reset_task);
+		return;
+	}
 	while (test_and_set_bit(__IAVF_IN_CLIENT_TASK,
 				&adapter->crit_section))
 		usleep_range(500, 1000);
@@ -2275,6 +2303,8 @@ static void iavf_adminq_task(struct work_struct *work)
 	if (!event.msg_buf)
 		goto out;
 
+	if (iavf_lock_timeout(adapter, __IAVF_IN_CRITICAL_TASK, 200))
+		goto freedom;
 	do {
 		ret = iavf_clean_arq_element(hw, &event, &pending);
 		v_op = (enum virtchnl_ops)le32_to_cpu(event.desc.cookie_high);
@@ -2288,6 +2318,7 @@ static void iavf_adminq_task(struct work_struct *work)
 		if (pending != 0)
 			memset(event.msg_buf, 0, IAVF_MAX_AQ_BUF_SIZE);
 	} while (pending);
+	clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
 
 	if ((adapter->flags &
 	     (IAVF_FLAG_RESET_PENDING | IAVF_FLAG_RESET_NEEDED)) ||
@@ -3590,6 +3621,10 @@ static void iavf_init_task(struct work_struct *work)
 						    init_task.work);
 	struct iavf_hw *hw = &adapter->hw;
 
+	if (iavf_lock_timeout(adapter, __IAVF_IN_CRITICAL_TASK, 5000)) {
+		dev_warn(&adapter->pdev->dev, "failed to set __IAVF_IN_CRITICAL_TASK in %s\n", __FUNCTION__);
+		return;
+	}
 	switch (adapter->state) {
 	case __IAVF_STARTUP:
 		if (iavf_startup(adapter) < 0)
@@ -3602,14 +3637,14 @@ static void iavf_init_task(struct work_struct *work)
 	case __IAVF_INIT_GET_RESOURCES:
 		if (iavf_init_get_resources(adapter) < 0)
 			goto init_failed;
-		return;
+		goto out;
 	default:
 		goto init_failed;
 	}
 
 	queue_delayed_work(iavf_wq, &adapter->init_task,
 			   msecs_to_jiffies(30));
-	return;
+	goto out;
 init_failed:
 	if (++adapter->aq_wait_count > IAVF_AQ_MAX_ERR) {
 		dev_err(&adapter->pdev->dev,
@@ -3618,9 +3653,11 @@ static void iavf_init_task(struct work_struct *work)
 		iavf_shutdown_adminq(hw);
 		adapter->state = __IAVF_STARTUP;
 		queue_delayed_work(iavf_wq, &adapter->init_task, HZ * 5);
-		return;
+		goto out;
 	}
 	queue_delayed_work(iavf_wq, &adapter->init_task, HZ);
+out:
+	clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
 }
 
 /**
@@ -3637,9 +3674,12 @@ static void iavf_shutdown(struct pci_dev *pdev)
 	if (netif_running(netdev))
 		iavf_close(netdev);
 
+	if (iavf_lock_timeout(adapter, __IAVF_IN_CRITICAL_TASK, 5000))
+		dev_warn(&adapter->pdev->dev, "failed to set __IAVF_IN_CRITICAL_TASK in %s\n", __FUNCTION__);
 	/* Prevent the watchdog from running. */
 	adapter->state = __IAVF_REMOVE;
 	adapter->aq_required = 0;
+	clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
 
 #ifdef CONFIG_PM
 	pci_save_state(pdev);
@@ -3866,10 +3906,6 @@ static void iavf_remove(struct pci_dev *pdev)
 				 err);
 	}
 
-	/* Shut down all the garbage mashers on the detention level */
-	adapter->state = __IAVF_REMOVE;
-	adapter->aq_required = 0;
-	adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED;
 	iavf_request_reset(adapter);
 	msleep(50);
 	/* If the FW isn't responding, kick it once, but only once. */
@@ -3877,6 +3913,13 @@ static void iavf_remove(struct pci_dev *pdev)
 		iavf_request_reset(adapter);
 		msleep(50);
 	}
+	if (iavf_lock_timeout(adapter, __IAVF_IN_CRITICAL_TASK, 5000))
+		dev_warn(&adapter->pdev->dev, "failed to set __IAVF_IN_CRITICAL_TASK in %s\n", __FUNCTION__);
+
+	/* Shut down all the garbage mashers on the detention level */
+	adapter->state = __IAVF_REMOVE;
+	adapter->aq_required = 0;
+	adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED;
 	iavf_free_all_tx_resources(adapter);
 	iavf_free_all_rx_resources(adapter);
 	iavf_misc_irq_disable(adapter);
-- 
2.29.2


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH] iavf: fix locking of critical sections
  2021-03-16 10:01 ` [Intel-wired-lan] " Stefan Assmann
@ 2021-03-16 17:14   ` Jakub Kicinski
  -1 siblings, 0 replies; 18+ messages in thread
From: Jakub Kicinski @ 2021-03-16 17:14 UTC (permalink / raw)
  To: Stefan Assmann
  Cc: intel-wired-lan, netdev, anthony.l.nguyen, lihong.yang,
	jesse.brandeburg, slawomirx.laba, nicholas.d.nunley

On Tue, 16 Mar 2021 11:01:41 +0100 Stefan Assmann wrote:
> To avoid races between iavf_init_task(), iavf_reset_task(),
> iavf_watchdog_task(), iavf_adminq_task() as well as the shutdown and
> remove functions more locking is required.
> The current protection by __IAVF_IN_CRITICAL_TASK is needed in
> additional places.
> 
> - The reset task performs state transitions, therefore needs locking.
> - The adminq task acts on replies from the PF in
>   iavf_virtchnl_completion() which may alter the states.
> - The init task is not only run during probe but also if a VF gets stuck
>   to reinitialize it.
> - The shutdown function performs a state transition.
> - The remove function perorms a state transition and also free's
>   resources.
> 
> iavf_lock_timeout() is introduced to avoid waiting infinitely
> and cause a deadlock. Rather unlock and print a warning.
> 
> Signed-off-by: Stefan Assmann <sassmann@kpanic.de>

I personally think that the overuse of flags in Intel drivers brings
nothing but trouble. At which point does it make sense to just add a
lock / semaphore here rather than open code all this with no clear
semantics? No code seems to just test the __IAVF_IN_CRITICAL_TASK flag,
all the uses look like poor man's locking at a quick grep. What am I
missing?

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [Intel-wired-lan] [PATCH] iavf: fix locking of critical sections
@ 2021-03-16 17:14   ` Jakub Kicinski
  0 siblings, 0 replies; 18+ messages in thread
From: Jakub Kicinski @ 2021-03-16 17:14 UTC (permalink / raw)
  To: intel-wired-lan

On Tue, 16 Mar 2021 11:01:41 +0100 Stefan Assmann wrote:
> To avoid races between iavf_init_task(), iavf_reset_task(),
> iavf_watchdog_task(), iavf_adminq_task() as well as the shutdown and
> remove functions more locking is required.
> The current protection by __IAVF_IN_CRITICAL_TASK is needed in
> additional places.
> 
> - The reset task performs state transitions, therefore needs locking.
> - The adminq task acts on replies from the PF in
>   iavf_virtchnl_completion() which may alter the states.
> - The init task is not only run during probe but also if a VF gets stuck
>   to reinitialize it.
> - The shutdown function performs a state transition.
> - The remove function perorms a state transition and also free's
>   resources.
> 
> iavf_lock_timeout() is introduced to avoid waiting infinitely
> and cause a deadlock. Rather unlock and print a warning.
> 
> Signed-off-by: Stefan Assmann <sassmann@kpanic.de>

I personally think that the overuse of flags in Intel drivers brings
nothing but trouble. At which point does it make sense to just add a
lock / semaphore here rather than open code all this with no clear
semantics? No code seems to just test the __IAVF_IN_CRITICAL_TASK flag,
all the uses look like poor man's locking at a quick grep. What am I
missing?

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] iavf: fix locking of critical sections
  2021-03-16 17:14   ` [Intel-wired-lan] " Jakub Kicinski
@ 2021-03-16 17:27     ` Stefan Assmann
  -1 siblings, 0 replies; 18+ messages in thread
From: Stefan Assmann @ 2021-03-16 17:27 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: intel-wired-lan, netdev, anthony.l.nguyen, lihong.yang,
	jesse.brandeburg, slawomirx.laba, nicholas.d.nunley

On 16.03.21 18:14, Jakub Kicinski wrote:
> On Tue, 16 Mar 2021 11:01:41 +0100 Stefan Assmann wrote:
>> To avoid races between iavf_init_task(), iavf_reset_task(),
>> iavf_watchdog_task(), iavf_adminq_task() as well as the shutdown and
>> remove functions more locking is required.
>> The current protection by __IAVF_IN_CRITICAL_TASK is needed in
>> additional places.
>>
>> - The reset task performs state transitions, therefore needs locking.
>> - The adminq task acts on replies from the PF in
>>   iavf_virtchnl_completion() which may alter the states.
>> - The init task is not only run during probe but also if a VF gets stuck
>>   to reinitialize it.
>> - The shutdown function performs a state transition.
>> - The remove function perorms a state transition and also free's
>>   resources.
>>
>> iavf_lock_timeout() is introduced to avoid waiting infinitely
>> and cause a deadlock. Rather unlock and print a warning.
>>
>> Signed-off-by: Stefan Assmann <sassmann@kpanic.de>
> 
> I personally think that the overuse of flags in Intel drivers brings
> nothing but trouble. At which point does it make sense to just add a
> lock / semaphore here rather than open code all this with no clear
> semantics? No code seems to just test the __IAVF_IN_CRITICAL_TASK flag,
> all the uses look like poor man's locking at a quick grep. What am I
> missing?
> 

Hi Jakub,

I agree with you that the locking could be done with other locking
mechanisms just as good. I didn't invent the current method so I'll let
Intel comment on that part, but I'd like to point out that what I'm
making use of is fixing what is currently in the driver.

Thanks!

  Stefan

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [Intel-wired-lan] [PATCH] iavf: fix locking of critical sections
@ 2021-03-16 17:27     ` Stefan Assmann
  0 siblings, 0 replies; 18+ messages in thread
From: Stefan Assmann @ 2021-03-16 17:27 UTC (permalink / raw)
  To: intel-wired-lan

On 16.03.21 18:14, Jakub Kicinski wrote:
> On Tue, 16 Mar 2021 11:01:41 +0100 Stefan Assmann wrote:
>> To avoid races between iavf_init_task(), iavf_reset_task(),
>> iavf_watchdog_task(), iavf_adminq_task() as well as the shutdown and
>> remove functions more locking is required.
>> The current protection by __IAVF_IN_CRITICAL_TASK is needed in
>> additional places.
>>
>> - The reset task performs state transitions, therefore needs locking.
>> - The adminq task acts on replies from the PF in
>>   iavf_virtchnl_completion() which may alter the states.
>> - The init task is not only run during probe but also if a VF gets stuck
>>   to reinitialize it.
>> - The shutdown function performs a state transition.
>> - The remove function perorms a state transition and also free's
>>   resources.
>>
>> iavf_lock_timeout() is introduced to avoid waiting infinitely
>> and cause a deadlock. Rather unlock and print a warning.
>>
>> Signed-off-by: Stefan Assmann <sassmann@kpanic.de>
> 
> I personally think that the overuse of flags in Intel drivers brings
> nothing but trouble. At which point does it make sense to just add a
> lock / semaphore here rather than open code all this with no clear
> semantics? No code seems to just test the __IAVF_IN_CRITICAL_TASK flag,
> all the uses look like poor man's locking at a quick grep. What am I
> missing?
> 

Hi Jakub,

I agree with you that the locking could be done with other locking
mechanisms just as good. I didn't invent the current method so I'll let
Intel comment on that part, but I'd like to point out that what I'm
making use of is fixing what is currently in the driver.

Thanks!

  Stefan

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] iavf: fix locking of critical sections
  2021-03-16 17:27     ` [Intel-wired-lan] " Stefan Assmann
@ 2021-03-16 20:29       ` Jakub Kicinski
  -1 siblings, 0 replies; 18+ messages in thread
From: Jakub Kicinski @ 2021-03-16 20:29 UTC (permalink / raw)
  To: Stefan Assmann
  Cc: intel-wired-lan, netdev, anthony.l.nguyen, lihong.yang,
	jesse.brandeburg, slawomirx.laba, nicholas.d.nunley

On Tue, 16 Mar 2021 18:27:10 +0100 Stefan Assmann wrote:
> On 16.03.21 18:14, Jakub Kicinski wrote:
> > On Tue, 16 Mar 2021 11:01:41 +0100 Stefan Assmann wrote:  
> >> To avoid races between iavf_init_task(), iavf_reset_task(),
> >> iavf_watchdog_task(), iavf_adminq_task() as well as the shutdown and
> >> remove functions more locking is required.
> >> The current protection by __IAVF_IN_CRITICAL_TASK is needed in
> >> additional places.
> >>
> >> - The reset task performs state transitions, therefore needs locking.
> >> - The adminq task acts on replies from the PF in
> >>   iavf_virtchnl_completion() which may alter the states.
> >> - The init task is not only run during probe but also if a VF gets stuck
> >>   to reinitialize it.
> >> - The shutdown function performs a state transition.
> >> - The remove function perorms a state transition and also free's
> >>   resources.
> >>
> >> iavf_lock_timeout() is introduced to avoid waiting infinitely
> >> and cause a deadlock. Rather unlock and print a warning.
> >>
> >> Signed-off-by: Stefan Assmann <sassmann@kpanic.de>  
> > 
> > I personally think that the overuse of flags in Intel drivers brings
> > nothing but trouble. At which point does it make sense to just add a
> > lock / semaphore here rather than open code all this with no clear
> > semantics? No code seems to just test the __IAVF_IN_CRITICAL_TASK flag,
> > all the uses look like poor man's locking at a quick grep. What am I
> > missing?
> 
> I agree with you that the locking could be done with other locking
> mechanisms just as good. I didn't invent the current method so I'll let
> Intel comment on that part, but I'd like to point out that what I'm
> making use of is fixing what is currently in the driver.

Right, I should have made it clear that I don't blame you for the
current state of things. Would you mind sending a patch on top of 
this one to do a conversion to a semaphore? 

Intel folks any opinions?

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [Intel-wired-lan] [PATCH] iavf: fix locking of critical sections
@ 2021-03-16 20:29       ` Jakub Kicinski
  0 siblings, 0 replies; 18+ messages in thread
From: Jakub Kicinski @ 2021-03-16 20:29 UTC (permalink / raw)
  To: intel-wired-lan

On Tue, 16 Mar 2021 18:27:10 +0100 Stefan Assmann wrote:
> On 16.03.21 18:14, Jakub Kicinski wrote:
> > On Tue, 16 Mar 2021 11:01:41 +0100 Stefan Assmann wrote:  
> >> To avoid races between iavf_init_task(), iavf_reset_task(),
> >> iavf_watchdog_task(), iavf_adminq_task() as well as the shutdown and
> >> remove functions more locking is required.
> >> The current protection by __IAVF_IN_CRITICAL_TASK is needed in
> >> additional places.
> >>
> >> - The reset task performs state transitions, therefore needs locking.
> >> - The adminq task acts on replies from the PF in
> >>   iavf_virtchnl_completion() which may alter the states.
> >> - The init task is not only run during probe but also if a VF gets stuck
> >>   to reinitialize it.
> >> - The shutdown function performs a state transition.
> >> - The remove function perorms a state transition and also free's
> >>   resources.
> >>
> >> iavf_lock_timeout() is introduced to avoid waiting infinitely
> >> and cause a deadlock. Rather unlock and print a warning.
> >>
> >> Signed-off-by: Stefan Assmann <sassmann@kpanic.de>  
> > 
> > I personally think that the overuse of flags in Intel drivers brings
> > nothing but trouble. At which point does it make sense to just add a
> > lock / semaphore here rather than open code all this with no clear
> > semantics? No code seems to just test the __IAVF_IN_CRITICAL_TASK flag,
> > all the uses look like poor man's locking at a quick grep. What am I
> > missing?
> 
> I agree with you that the locking could be done with other locking
> mechanisms just as good. I didn't invent the current method so I'll let
> Intel comment on that part, but I'd like to point out that what I'm
> making use of is fixing what is currently in the driver.

Right, I should have made it clear that I don't blame you for the
current state of things. Would you mind sending a patch on top of 
this one to do a conversion to a semaphore? 

Intel folks any opinions?

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] iavf: fix locking of critical sections
  2021-03-16 20:29       ` [Intel-wired-lan] " Jakub Kicinski
@ 2021-03-16 22:02         ` Jesse Brandeburg
  -1 siblings, 0 replies; 18+ messages in thread
From: Jesse Brandeburg @ 2021-03-16 22:02 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Stefan Assmann, intel-wired-lan, netdev, anthony.l.nguyen,
	lihong.yang, slawomirx.laba, nicholas.d.nunley

Jakub Kicinski wrote:
> > > I personally think that the overuse of flags in Intel drivers brings
> > > nothing but trouble. At which point does it make sense to just add a
> > > lock / semaphore here rather than open code all this with no clear
> > > semantics? No code seems to just test the __IAVF_IN_CRITICAL_TASK flag,
> > > all the uses look like poor man's locking at a quick grep. What am I
> > > missing?
> > 
> > I agree with you that the locking could be done with other locking
> > mechanisms just as good. I didn't invent the current method so I'll let
> > Intel comment on that part, but I'd like to point out that what I'm
> > making use of is fixing what is currently in the driver.
> 
> Right, I should have made it clear that I don't blame you for the
> current state of things. Would you mind sending a patch on top of 
> this one to do a conversion to a semaphore? 
> 
> Intel folks any opinions?

I know Slawomir has been working closely with Stefan on figuring out
the right ways to fix this code.  Hopefully he can speak for himself,
but I know he's on Europe time.

As for conversion to mutexes I'm a big fan, and as long as we don't
have too many collisions with the RTNL lock I think it's a reasonable
improvement to do, and if Stefan doesn't want to work on it, we can
look into whether Slawomir or his team can.


^ permalink raw reply	[flat|nested] 18+ messages in thread

* [Intel-wired-lan] [PATCH] iavf: fix locking of critical sections
@ 2021-03-16 22:02         ` Jesse Brandeburg
  0 siblings, 0 replies; 18+ messages in thread
From: Jesse Brandeburg @ 2021-03-16 22:02 UTC (permalink / raw)
  To: intel-wired-lan

Jakub Kicinski wrote:
> > > I personally think that the overuse of flags in Intel drivers brings
> > > nothing but trouble. At which point does it make sense to just add a
> > > lock / semaphore here rather than open code all this with no clear
> > > semantics? No code seems to just test the __IAVF_IN_CRITICAL_TASK flag,
> > > all the uses look like poor man's locking at a quick grep. What am I
> > > missing?
> > 
> > I agree with you that the locking could be done with other locking
> > mechanisms just as good. I didn't invent the current method so I'll let
> > Intel comment on that part, but I'd like to point out that what I'm
> > making use of is fixing what is currently in the driver.
> 
> Right, I should have made it clear that I don't blame you for the
> current state of things. Would you mind sending a patch on top of 
> this one to do a conversion to a semaphore? 
> 
> Intel folks any opinions?

I know Slawomir has been working closely with Stefan on figuring out
the right ways to fix this code.  Hopefully he can speak for himself,
but I know he's on Europe time.

As for conversion to mutexes I'm a big fan, and as long as we don't
have too many collisions with the RTNL lock I think it's a reasonable
improvement to do, and if Stefan doesn't want to work on it, we can
look into whether Slawomir or his team can.


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] iavf: fix locking of critical sections
  2021-03-16 22:02         ` [Intel-wired-lan] " Jesse Brandeburg
@ 2021-03-17  7:49           ` Stefan Assmann
  -1 siblings, 0 replies; 18+ messages in thread
From: Stefan Assmann @ 2021-03-17  7:49 UTC (permalink / raw)
  To: Jesse Brandeburg, Jakub Kicinski
  Cc: intel-wired-lan, netdev, anthony.l.nguyen, lihong.yang,
	slawomirx.laba, nicholas.d.nunley

On 16.03.21 23:02, Jesse Brandeburg wrote:
> Jakub Kicinski wrote:
>>>> I personally think that the overuse of flags in Intel drivers brings
>>>> nothing but trouble. At which point does it make sense to just add a
>>>> lock / semaphore here rather than open code all this with no clear
>>>> semantics? No code seems to just test the __IAVF_IN_CRITICAL_TASK flag,
>>>> all the uses look like poor man's locking at a quick grep. What am I
>>>> missing?
>>>
>>> I agree with you that the locking could be done with other locking
>>> mechanisms just as good. I didn't invent the current method so I'll let
>>> Intel comment on that part, but I'd like to point out that what I'm
>>> making use of is fixing what is currently in the driver.
>>
>> Right, I should have made it clear that I don't blame you for the
>> current state of things. Would you mind sending a patch on top of 
>> this one to do a conversion to a semaphore? 

Sure, I'm happy to help working on the conversion once the current issue
is resolved.

>> Intel folks any opinions?
> 
> I know Slawomir has been working closely with Stefan on figuring out
> the right ways to fix this code.  Hopefully he can speak for himself,
> but I know he's on Europe time.
> 
> As for conversion to mutexes I'm a big fan, and as long as we don't
> have too many collisions with the RTNL lock I think it's a reasonable
> improvement to do, and if Stefan doesn't want to work on it, we can
> look into whether Slawomir or his team can.

I'd appreciate to be involved.
Thanks!

  Stefan

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [Intel-wired-lan] [PATCH] iavf: fix locking of critical sections
@ 2021-03-17  7:49           ` Stefan Assmann
  0 siblings, 0 replies; 18+ messages in thread
From: Stefan Assmann @ 2021-03-17  7:49 UTC (permalink / raw)
  To: intel-wired-lan

On 16.03.21 23:02, Jesse Brandeburg wrote:
> Jakub Kicinski wrote:
>>>> I personally think that the overuse of flags in Intel drivers brings
>>>> nothing but trouble. At which point does it make sense to just add a
>>>> lock / semaphore here rather than open code all this with no clear
>>>> semantics? No code seems to just test the __IAVF_IN_CRITICAL_TASK flag,
>>>> all the uses look like poor man's locking at a quick grep. What am I
>>>> missing?
>>>
>>> I agree with you that the locking could be done with other locking
>>> mechanisms just as good. I didn't invent the current method so I'll let
>>> Intel comment on that part, but I'd like to point out that what I'm
>>> making use of is fixing what is currently in the driver.
>>
>> Right, I should have made it clear that I don't blame you for the
>> current state of things. Would you mind sending a patch on top of 
>> this one to do a conversion to a semaphore? 

Sure, I'm happy to help working on the conversion once the current issue
is resolved.

>> Intel folks any opinions?
> 
> I know Slawomir has been working closely with Stefan on figuring out
> the right ways to fix this code.  Hopefully he can speak for himself,
> but I know he's on Europe time.
> 
> As for conversion to mutexes I'm a big fan, and as long as we don't
> have too many collisions with the RTNL lock I think it's a reasonable
> improvement to do, and if Stefan doesn't want to work on it, we can
> look into whether Slawomir or his team can.

I'd appreciate to be involved.
Thanks!

  Stefan

^ permalink raw reply	[flat|nested] 18+ messages in thread

* RE: [PATCH] iavf: fix locking of critical sections
  2021-03-17  7:49           ` [Intel-wired-lan] " Stefan Assmann
@ 2021-03-17 17:27             ` Laba, SlawomirX
  -1 siblings, 0 replies; 18+ messages in thread
From: Laba, SlawomirX @ 2021-03-17 17:27 UTC (permalink / raw)
  To: Stefan Assmann, Brandeburg, Jesse, Jakub Kicinski
  Cc: intel-wired-lan, netdev, Nguyen, Anthony L, Yang, Lihong, Nunley,
	Nicholas D

We were discussing introducing mutexes in those critical spots for a long time now (in my team).
Stefan, if you find time, you are most welcome to offer your solution with mutexes.

Slawek


-----Original Message-----
From: Stefan Assmann <sassmann@kpanic.de> 
Sent: Wednesday, March 17, 2021 8:50 AM
To: Brandeburg, Jesse <jesse.brandeburg@intel.com>; Jakub Kicinski <kuba@kernel.org>
Cc: intel-wired-lan@lists.osuosl.org; netdev@vger.kernel.org; Nguyen, Anthony L <anthony.l.nguyen@intel.com>; Yang, Lihong <lihong.yang@intel.com>; Laba, SlawomirX <slawomirx.laba@intel.com>; Nunley, Nicholas D <nicholas.d.nunley@intel.com>
Subject: Re: [PATCH] iavf: fix locking of critical sections

On 16.03.21 23:02, Jesse Brandeburg wrote:
> Jakub Kicinski wrote:
>>>> I personally think that the overuse of flags in Intel drivers 
>>>> brings nothing but trouble. At which point does it make sense to 
>>>> just add a lock / semaphore here rather than open code all this 
>>>> with no clear semantics? No code seems to just test the 
>>>> __IAVF_IN_CRITICAL_TASK flag, all the uses look like poor man's 
>>>> locking at a quick grep. What am I missing?
>>>
>>> I agree with you that the locking could be done with other locking 
>>> mechanisms just as good. I didn't invent the current method so I'll 
>>> let Intel comment on that part, but I'd like to point out that what 
>>> I'm making use of is fixing what is currently in the driver.
>>
>> Right, I should have made it clear that I don't blame you for the 
>> current state of things. Would you mind sending a patch on top of 
>> this one to do a conversion to a semaphore?

Sure, I'm happy to help working on the conversion once the current issue is resolved.

>> Intel folks any opinions?
> 
> I know Slawomir has been working closely with Stefan on figuring out 
> the right ways to fix this code.  Hopefully he can speak for himself, 
> but I know he's on Europe time.
> 
> As for conversion to mutexes I'm a big fan, and as long as we don't 
> have too many collisions with the RTNL lock I think it's a reasonable 
> improvement to do, and if Stefan doesn't want to work on it, we can 
> look into whether Slawomir or his team can.

I'd appreciate to be involved.
Thanks!

  Stefan

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [Intel-wired-lan] [PATCH] iavf: fix locking of critical sections
@ 2021-03-17 17:27             ` Laba, SlawomirX
  0 siblings, 0 replies; 18+ messages in thread
From: Laba, SlawomirX @ 2021-03-17 17:27 UTC (permalink / raw)
  To: intel-wired-lan

We were discussing introducing mutexes in those critical spots for a long time now (in my team).
Stefan, if you find time, you are most welcome to offer your solution with mutexes.

Slawek


-----Original Message-----
From: Stefan Assmann <sassmann@kpanic.de> 
Sent: Wednesday, March 17, 2021 8:50 AM
To: Brandeburg, Jesse <jesse.brandeburg@intel.com>; Jakub Kicinski <kuba@kernel.org>
Cc: intel-wired-lan at lists.osuosl.org; netdev at vger.kernel.org; Nguyen, Anthony L <anthony.l.nguyen@intel.com>; Yang, Lihong <lihong.yang@intel.com>; Laba, SlawomirX <slawomirx.laba@intel.com>; Nunley, Nicholas D <nicholas.d.nunley@intel.com>
Subject: Re: [PATCH] iavf: fix locking of critical sections

On 16.03.21 23:02, Jesse Brandeburg wrote:
> Jakub Kicinski wrote:
>>>> I personally think that the overuse of flags in Intel drivers 
>>>> brings nothing but trouble. At which point does it make sense to 
>>>> just add a lock / semaphore here rather than open code all this 
>>>> with no clear semantics? No code seems to just test the 
>>>> __IAVF_IN_CRITICAL_TASK flag, all the uses look like poor man's 
>>>> locking at a quick grep. What am I missing?
>>>
>>> I agree with you that the locking could be done with other locking 
>>> mechanisms just as good. I didn't invent the current method so I'll 
>>> let Intel comment on that part, but I'd like to point out that what 
>>> I'm making use of is fixing what is currently in the driver.
>>
>> Right, I should have made it clear that I don't blame you for the 
>> current state of things. Would you mind sending a patch on top of 
>> this one to do a conversion to a semaphore?

Sure, I'm happy to help working on the conversion once the current issue is resolved.

>> Intel folks any opinions?
> 
> I know Slawomir has been working closely with Stefan on figuring out 
> the right ways to fix this code.  Hopefully he can speak for himself, 
> but I know he's on Europe time.
> 
> As for conversion to mutexes I'm a big fan, and as long as we don't 
> have too many collisions with the RTNL lock I think it's a reasonable 
> improvement to do, and if Stefan doesn't want to work on it, we can 
> look into whether Slawomir or his team can.

I'd appreciate to be involved.
Thanks!

  Stefan

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] iavf: fix locking of critical sections
  2021-03-17 17:27             ` [Intel-wired-lan] " Laba, SlawomirX
@ 2021-03-17 18:14               ` Stefan Assmann
  -1 siblings, 0 replies; 18+ messages in thread
From: Stefan Assmann @ 2021-03-17 18:14 UTC (permalink / raw)
  To: Laba, SlawomirX, Brandeburg, Jesse, Jakub Kicinski
  Cc: intel-wired-lan, netdev, Nguyen, Anthony L, Yang, Lihong, Nunley,
	Nicholas D

On 17.03.21 18:27, Laba, SlawomirX wrote:
> We were discussing introducing mutexes in those critical spots for a long time now (in my team).
> Stefan, if you find time, you are most welcome to offer your solution with mutexes.

Hi Slawek,

I'll work on that conversion once the current patch went through Intel
testing and is merged. Smaller patches, smaller steps are usually
better, so let's make one change at a time.

Thanks!

  Stefan

> Slawek
> 
> 
> -----Original Message-----
> From: Stefan Assmann <sassmann@kpanic.de> 
> Sent: Wednesday, March 17, 2021 8:50 AM
> To: Brandeburg, Jesse <jesse.brandeburg@intel.com>; Jakub Kicinski <kuba@kernel.org>
> Cc: intel-wired-lan@lists.osuosl.org; netdev@vger.kernel.org; Nguyen, Anthony L <anthony.l.nguyen@intel.com>; Yang, Lihong <lihong.yang@intel.com>; Laba, SlawomirX <slawomirx.laba@intel.com>; Nunley, Nicholas D <nicholas.d.nunley@intel.com>
> Subject: Re: [PATCH] iavf: fix locking of critical sections
> 
> On 16.03.21 23:02, Jesse Brandeburg wrote:
>> Jakub Kicinski wrote:
>>>>> I personally think that the overuse of flags in Intel drivers 
>>>>> brings nothing but trouble. At which point does it make sense to 
>>>>> just add a lock / semaphore here rather than open code all this 
>>>>> with no clear semantics? No code seems to just test the 
>>>>> __IAVF_IN_CRITICAL_TASK flag, all the uses look like poor man's 
>>>>> locking at a quick grep. What am I missing?
>>>>
>>>> I agree with you that the locking could be done with other locking 
>>>> mechanisms just as good. I didn't invent the current method so I'll 
>>>> let Intel comment on that part, but I'd like to point out that what 
>>>> I'm making use of is fixing what is currently in the driver.
>>>
>>> Right, I should have made it clear that I don't blame you for the 
>>> current state of things. Would you mind sending a patch on top of 
>>> this one to do a conversion to a semaphore?
> 
> Sure, I'm happy to help working on the conversion once the current issue is resolved.
> 
>>> Intel folks any opinions?
>>
>> I know Slawomir has been working closely with Stefan on figuring out 
>> the right ways to fix this code.  Hopefully he can speak for himself, 
>> but I know he's on Europe time.
>>
>> As for conversion to mutexes I'm a big fan, and as long as we don't 
>> have too many collisions with the RTNL lock I think it's a reasonable 
>> improvement to do, and if Stefan doesn't want to work on it, we can 
>> look into whether Slawomir or his team can.
> 
> I'd appreciate to be involved.
> Thanks!
> 
>   Stefan
> 


^ permalink raw reply	[flat|nested] 18+ messages in thread

* [Intel-wired-lan] [PATCH] iavf: fix locking of critical sections
@ 2021-03-17 18:14               ` Stefan Assmann
  0 siblings, 0 replies; 18+ messages in thread
From: Stefan Assmann @ 2021-03-17 18:14 UTC (permalink / raw)
  To: intel-wired-lan

On 17.03.21 18:27, Laba, SlawomirX wrote:
> We were discussing introducing mutexes in those critical spots for a long time now (in my team).
> Stefan, if you find time, you are most welcome to offer your solution with mutexes.

Hi Slawek,

I'll work on that conversion once the current patch went through Intel
testing and is merged. Smaller patches, smaller steps are usually
better, so let's make one change at a time.

Thanks!

  Stefan

> Slawek
> 
> 
> -----Original Message-----
> From: Stefan Assmann <sassmann@kpanic.de> 
> Sent: Wednesday, March 17, 2021 8:50 AM
> To: Brandeburg, Jesse <jesse.brandeburg@intel.com>; Jakub Kicinski <kuba@kernel.org>
> Cc: intel-wired-lan at lists.osuosl.org; netdev at vger.kernel.org; Nguyen, Anthony L <anthony.l.nguyen@intel.com>; Yang, Lihong <lihong.yang@intel.com>; Laba, SlawomirX <slawomirx.laba@intel.com>; Nunley, Nicholas D <nicholas.d.nunley@intel.com>
> Subject: Re: [PATCH] iavf: fix locking of critical sections
> 
> On 16.03.21 23:02, Jesse Brandeburg wrote:
>> Jakub Kicinski wrote:
>>>>> I personally think that the overuse of flags in Intel drivers 
>>>>> brings nothing but trouble. At which point does it make sense to 
>>>>> just add a lock / semaphore here rather than open code all this 
>>>>> with no clear semantics? No code seems to just test the 
>>>>> __IAVF_IN_CRITICAL_TASK flag, all the uses look like poor man's 
>>>>> locking at a quick grep. What am I missing?
>>>>
>>>> I agree with you that the locking could be done with other locking 
>>>> mechanisms just as good. I didn't invent the current method so I'll 
>>>> let Intel comment on that part, but I'd like to point out that what 
>>>> I'm making use of is fixing what is currently in the driver.
>>>
>>> Right, I should have made it clear that I don't blame you for the 
>>> current state of things. Would you mind sending a patch on top of 
>>> this one to do a conversion to a semaphore?
> 
> Sure, I'm happy to help working on the conversion once the current issue is resolved.
> 
>>> Intel folks any opinions?
>>
>> I know Slawomir has been working closely with Stefan on figuring out 
>> the right ways to fix this code.  Hopefully he can speak for himself, 
>> but I know he's on Europe time.
>>
>> As for conversion to mutexes I'm a big fan, and as long as we don't 
>> have too many collisions with the RTNL lock I think it's a reasonable 
>> improvement to do, and if Stefan doesn't want to work on it, we can 
>> look into whether Slawomir or his team can.
> 
> I'd appreciate to be involved.
> Thanks!
> 
>   Stefan
> 


^ permalink raw reply	[flat|nested] 18+ messages in thread

* RE: [Intel-wired-lan] [PATCH] iavf: fix locking of critical sections
  2021-03-16 10:01 ` [Intel-wired-lan] " Stefan Assmann
@ 2021-07-06  8:07   ` Jankowski, Konrad0
  -1 siblings, 0 replies; 18+ messages in thread
From: Jankowski, Konrad0 @ 2021-07-06  8:07 UTC (permalink / raw)
  To: Stefan Assmann, intel-wired-lan; +Cc: Laba, SlawomirX, netdev



> -----Original Message-----
> From: Intel-wired-lan <intel-wired-lan-bounces@osuosl.org> On Behalf Of
> Stefan Assmann
> Sent: wtorek, 16 marca 2021 11:02
> To: intel-wired-lan@lists.osuosl.org
> Cc: Laba, SlawomirX <slawomirx.laba@intel.com>; netdev@vger.kernel.org;
> sassmann@kpanic.de
> Subject: [Intel-wired-lan] [PATCH] iavf: fix locking of critical sections
> 
> To avoid races between iavf_init_task(), iavf_reset_task(),
> iavf_watchdog_task(), iavf_adminq_task() as well as the shutdown and
> remove functions more locking is required.
> The current protection by __IAVF_IN_CRITICAL_TASK is needed in additional
> places.
> 
> - The reset task performs state transitions, therefore needs locking.
> - The adminq task acts on replies from the PF in
>   iavf_virtchnl_completion() which may alter the states.
> - The init task is not only run during probe but also if a VF gets stuck
>   to reinitialize it.
> - The shutdown function performs a state transition.
> - The remove function perorms a state transition and also free's
>   resources.
> 
> iavf_lock_timeout() is introduced to avoid waiting infinitely and cause a
> deadlock. Rather unlock and print a warning.
> 
> Signed-off-by: Stefan Assmann <sassmann@kpanic.de>
> ---
>  drivers/net/ethernet/intel/iavf/iavf_main.c | 57 ++++++++++++++++++---
>  1 file changed, 50 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c
> b/drivers/net/ethernet/intel/iavf/iavf_main.c
> index dc5b3c06d1e0..538b7aa43fa5 100644
> --- a/drivers/net/ethernet/intel/iavf/iavf_main.c
> +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
> @@ -131,6 +131,30 @@ enum iavf_status iavf_free_virt_mem_d(struct
> iavf_hw *hw,
>  	return 0;
>  }
> 
> +/**
> + * iavf_timeout - try to set bit but give up after timeout
> + * @adapter: board private structure
> + * @bit: bit to set
> + * @msecs: timeout in msecs
> + *
> + * Returns 0 on success, negative on failure  **/ static inline int
> +iavf_lock_timeout(struct iavf_adapter *adapter,
> +				    enum iavf_critical_section_t bit,
> +				    unsigned int msecs)
> +{
> +	unsigned int wait, delay = 10;
> +
> +	for (wait = 0; wait < msecs; wait += delay) {
> +		if (!test_and_set_bit(bit, &adapter->crit_section))
> +			return 0;
> +
> +		msleep(delay);
> +	}
> +
> +	return -1;
> +}
> +
>  /**
>   * iavf_schedule_reset - Set the flags and schedule a reset event
>   * @adapter: board private structure
> @@ -2069,6 +2093,10 @@ static void iavf_reset_task(struct work_struct
> *work)
>  	if (test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section))
>  		return;
> 
> +	if (iavf_lock_timeout(adapter, __IAVF_IN_CRITICAL_TASK, 200)) {
> +		schedule_work(&adapter->reset_task);
> +		return;
> +	}
>  	while (test_and_set_bit(__IAVF_IN_CLIENT_TASK,
>  				&adapter->crit_section))
>  		usleep_range(500, 1000);
> @@ -2275,6 +2303,8 @@ static void iavf_adminq_task(struct work_struct
> *work)
>  	if (!event.msg_buf)
>  		goto out;
> 
> +	if (iavf_lock_timeout(adapter, __IAVF_IN_CRITICAL_TASK, 200))
> +		goto freedom;
>  	do {
>  		ret = iavf_clean_arq_element(hw, &event, &pending);
>  		v_op = (enum
> virtchnl_ops)le32_to_cpu(event.desc.cookie_high);
> @@ -2288,6 +2318,7 @@ static void iavf_adminq_task(struct work_struct
> *work)
>  		if (pending != 0)
>  			memset(event.msg_buf, 0,
> IAVF_MAX_AQ_BUF_SIZE);
>  	} while (pending);
> +	clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
> 
>  	if ((adapter->flags &
>  	     (IAVF_FLAG_RESET_PENDING | IAVF_FLAG_RESET_NEEDED)) ||
> @@ -3590,6 +3621,10 @@ static void iavf_init_task(struct work_struct
> *work)
>  						    init_task.work);
>  	struct iavf_hw *hw = &adapter->hw;
> 
> +	if (iavf_lock_timeout(adapter, __IAVF_IN_CRITICAL_TASK, 5000)) {
> +		dev_warn(&adapter->pdev->dev, "failed to set
> __IAVF_IN_CRITICAL_TASK in %s\n", __FUNCTION__);
> +		return;
> +	}
>  	switch (adapter->state) {
>  	case __IAVF_STARTUP:
>  		if (iavf_startup(adapter) < 0)
> @@ -3602,14 +3637,14 @@ static void iavf_init_task(struct work_struct
> *work)
>  	case __IAVF_INIT_GET_RESOURCES:
>  		if (iavf_init_get_resources(adapter) < 0)
>  			goto init_failed;
> -		return;
> +		goto out;
>  	default:
>  		goto init_failed;
>  	}
> 
>  	queue_delayed_work(iavf_wq, &adapter->init_task,
>  			   msecs_to_jiffies(30));
> -	return;
> +	goto out;
>  init_failed:
>  	if (++adapter->aq_wait_count > IAVF_AQ_MAX_ERR) {
>  		dev_err(&adapter->pdev->dev,
> @@ -3618,9 +3653,11 @@ static void iavf_init_task(struct work_struct
> *work)
>  		iavf_shutdown_adminq(hw);
>  		adapter->state = __IAVF_STARTUP;
>  		queue_delayed_work(iavf_wq, &adapter->init_task, HZ * 5);
> -		return;
> +		goto out;
>  	}
>  	queue_delayed_work(iavf_wq, &adapter->init_task, HZ);
> +out:
> +	clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
>  }
> 
>  /**
> @@ -3637,9 +3674,12 @@ static void iavf_shutdown(struct pci_dev *pdev)
>  	if (netif_running(netdev))
>  		iavf_close(netdev);
> 
> +	if (iavf_lock_timeout(adapter, __IAVF_IN_CRITICAL_TASK, 5000))
> +		dev_warn(&adapter->pdev->dev, "failed to set
> __IAVF_IN_CRITICAL_TASK
> +in %s\n", __FUNCTION__);
>  	/* Prevent the watchdog from running. */
>  	adapter->state = __IAVF_REMOVE;
>  	adapter->aq_required = 0;
> +	clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
> 
>  #ifdef CONFIG_PM
>  	pci_save_state(pdev);
> @@ -3866,10 +3906,6 @@ static void iavf_remove(struct pci_dev *pdev)
>  				 err);
>  	}
> 
> -	/* Shut down all the garbage mashers on the detention level */
> -	adapter->state = __IAVF_REMOVE;
> -	adapter->aq_required = 0;
> -	adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED;
>  	iavf_request_reset(adapter);
>  	msleep(50);
>  	/* If the FW isn't responding, kick it once, but only once. */ @@ -
> 3877,6 +3913,13 @@ static void iavf_remove(struct pci_dev *pdev)
>  		iavf_request_reset(adapter);
>  		msleep(50);
>  	}
> +	if (iavf_lock_timeout(adapter, __IAVF_IN_CRITICAL_TASK, 5000))
> +		dev_warn(&adapter->pdev->dev, "failed to set
> __IAVF_IN_CRITICAL_TASK
> +in %s\n", __FUNCTION__);
> +
> +	/* Shut down all the garbage mashers on the detention level */
> +	adapter->state = __IAVF_REMOVE;
> +	adapter->aq_required = 0;
> +	adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED;
>  	iavf_free_all_tx_resources(adapter);
>  	iavf_free_all_rx_resources(adapter);
>  	iavf_misc_irq_disable(adapter);
> --
> 2.29.2

Tested-by: Konrad Jankowski <konrad0.jankowski@intel.com>


^ permalink raw reply	[flat|nested] 18+ messages in thread

* [Intel-wired-lan] [PATCH] iavf: fix locking of critical sections
@ 2021-07-06  8:07   ` Jankowski, Konrad0
  0 siblings, 0 replies; 18+ messages in thread
From: Jankowski, Konrad0 @ 2021-07-06  8:07 UTC (permalink / raw)
  To: intel-wired-lan



> -----Original Message-----
> From: Intel-wired-lan <intel-wired-lan-bounces@osuosl.org> On Behalf Of
> Stefan Assmann
> Sent: wtorek, 16 marca 2021 11:02
> To: intel-wired-lan at lists.osuosl.org
> Cc: Laba, SlawomirX <slawomirx.laba@intel.com>; netdev at vger.kernel.org;
> sassmann at kpanic.de
> Subject: [Intel-wired-lan] [PATCH] iavf: fix locking of critical sections
> 
> To avoid races between iavf_init_task(), iavf_reset_task(),
> iavf_watchdog_task(), iavf_adminq_task() as well as the shutdown and
> remove functions more locking is required.
> The current protection by __IAVF_IN_CRITICAL_TASK is needed in additional
> places.
> 
> - The reset task performs state transitions, therefore needs locking.
> - The adminq task acts on replies from the PF in
>   iavf_virtchnl_completion() which may alter the states.
> - The init task is not only run during probe but also if a VF gets stuck
>   to reinitialize it.
> - The shutdown function performs a state transition.
> - The remove function perorms a state transition and also free's
>   resources.
> 
> iavf_lock_timeout() is introduced to avoid waiting infinitely and cause a
> deadlock. Rather unlock and print a warning.
> 
> Signed-off-by: Stefan Assmann <sassmann@kpanic.de>
> ---
>  drivers/net/ethernet/intel/iavf/iavf_main.c | 57 ++++++++++++++++++---
>  1 file changed, 50 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c
> b/drivers/net/ethernet/intel/iavf/iavf_main.c
> index dc5b3c06d1e0..538b7aa43fa5 100644
> --- a/drivers/net/ethernet/intel/iavf/iavf_main.c
> +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
> @@ -131,6 +131,30 @@ enum iavf_status iavf_free_virt_mem_d(struct
> iavf_hw *hw,
>  	return 0;
>  }
> 
> +/**
> + * iavf_timeout - try to set bit but give up after timeout
> + * @adapter: board private structure
> + * @bit: bit to set
> + * @msecs: timeout in msecs
> + *
> + * Returns 0 on success, negative on failure  **/ static inline int
> +iavf_lock_timeout(struct iavf_adapter *adapter,
> +				    enum iavf_critical_section_t bit,
> +				    unsigned int msecs)
> +{
> +	unsigned int wait, delay = 10;
> +
> +	for (wait = 0; wait < msecs; wait += delay) {
> +		if (!test_and_set_bit(bit, &adapter->crit_section))
> +			return 0;
> +
> +		msleep(delay);
> +	}
> +
> +	return -1;
> +}
> +
>  /**
>   * iavf_schedule_reset - Set the flags and schedule a reset event
>   * @adapter: board private structure
> @@ -2069,6 +2093,10 @@ static void iavf_reset_task(struct work_struct
> *work)
>  	if (test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section))
>  		return;
> 
> +	if (iavf_lock_timeout(adapter, __IAVF_IN_CRITICAL_TASK, 200)) {
> +		schedule_work(&adapter->reset_task);
> +		return;
> +	}
>  	while (test_and_set_bit(__IAVF_IN_CLIENT_TASK,
>  				&adapter->crit_section))
>  		usleep_range(500, 1000);
> @@ -2275,6 +2303,8 @@ static void iavf_adminq_task(struct work_struct
> *work)
>  	if (!event.msg_buf)
>  		goto out;
> 
> +	if (iavf_lock_timeout(adapter, __IAVF_IN_CRITICAL_TASK, 200))
> +		goto freedom;
>  	do {
>  		ret = iavf_clean_arq_element(hw, &event, &pending);
>  		v_op = (enum
> virtchnl_ops)le32_to_cpu(event.desc.cookie_high);
> @@ -2288,6 +2318,7 @@ static void iavf_adminq_task(struct work_struct
> *work)
>  		if (pending != 0)
>  			memset(event.msg_buf, 0,
> IAVF_MAX_AQ_BUF_SIZE);
>  	} while (pending);
> +	clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
> 
>  	if ((adapter->flags &
>  	     (IAVF_FLAG_RESET_PENDING | IAVF_FLAG_RESET_NEEDED)) ||
> @@ -3590,6 +3621,10 @@ static void iavf_init_task(struct work_struct
> *work)
>  						    init_task.work);
>  	struct iavf_hw *hw = &adapter->hw;
> 
> +	if (iavf_lock_timeout(adapter, __IAVF_IN_CRITICAL_TASK, 5000)) {
> +		dev_warn(&adapter->pdev->dev, "failed to set
> __IAVF_IN_CRITICAL_TASK in %s\n", __FUNCTION__);
> +		return;
> +	}
>  	switch (adapter->state) {
>  	case __IAVF_STARTUP:
>  		if (iavf_startup(adapter) < 0)
> @@ -3602,14 +3637,14 @@ static void iavf_init_task(struct work_struct
> *work)
>  	case __IAVF_INIT_GET_RESOURCES:
>  		if (iavf_init_get_resources(adapter) < 0)
>  			goto init_failed;
> -		return;
> +		goto out;
>  	default:
>  		goto init_failed;
>  	}
> 
>  	queue_delayed_work(iavf_wq, &adapter->init_task,
>  			   msecs_to_jiffies(30));
> -	return;
> +	goto out;
>  init_failed:
>  	if (++adapter->aq_wait_count > IAVF_AQ_MAX_ERR) {
>  		dev_err(&adapter->pdev->dev,
> @@ -3618,9 +3653,11 @@ static void iavf_init_task(struct work_struct
> *work)
>  		iavf_shutdown_adminq(hw);
>  		adapter->state = __IAVF_STARTUP;
>  		queue_delayed_work(iavf_wq, &adapter->init_task, HZ * 5);
> -		return;
> +		goto out;
>  	}
>  	queue_delayed_work(iavf_wq, &adapter->init_task, HZ);
> +out:
> +	clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
>  }
> 
>  /**
> @@ -3637,9 +3674,12 @@ static void iavf_shutdown(struct pci_dev *pdev)
>  	if (netif_running(netdev))
>  		iavf_close(netdev);
> 
> +	if (iavf_lock_timeout(adapter, __IAVF_IN_CRITICAL_TASK, 5000))
> +		dev_warn(&adapter->pdev->dev, "failed to set
> __IAVF_IN_CRITICAL_TASK
> +in %s\n", __FUNCTION__);
>  	/* Prevent the watchdog from running. */
>  	adapter->state = __IAVF_REMOVE;
>  	adapter->aq_required = 0;
> +	clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
> 
>  #ifdef CONFIG_PM
>  	pci_save_state(pdev);
> @@ -3866,10 +3906,6 @@ static void iavf_remove(struct pci_dev *pdev)
>  				 err);
>  	}
> 
> -	/* Shut down all the garbage mashers on the detention level */
> -	adapter->state = __IAVF_REMOVE;
> -	adapter->aq_required = 0;
> -	adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED;
>  	iavf_request_reset(adapter);
>  	msleep(50);
>  	/* If the FW isn't responding, kick it once, but only once. */ @@ -
> 3877,6 +3913,13 @@ static void iavf_remove(struct pci_dev *pdev)
>  		iavf_request_reset(adapter);
>  		msleep(50);
>  	}
> +	if (iavf_lock_timeout(adapter, __IAVF_IN_CRITICAL_TASK, 5000))
> +		dev_warn(&adapter->pdev->dev, "failed to set
> __IAVF_IN_CRITICAL_TASK
> +in %s\n", __FUNCTION__);
> +
> +	/* Shut down all the garbage mashers on the detention level */
> +	adapter->state = __IAVF_REMOVE;
> +	adapter->aq_required = 0;
> +	adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED;
>  	iavf_free_all_tx_resources(adapter);
>  	iavf_free_all_rx_resources(adapter);
>  	iavf_misc_irq_disable(adapter);
> --
> 2.29.2

Tested-by: Konrad Jankowski <konrad0.jankowski@intel.com>


^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2021-07-06  8:07 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-03-16 10:01 [PATCH] iavf: fix locking of critical sections Stefan Assmann
2021-03-16 10:01 ` [Intel-wired-lan] " Stefan Assmann
2021-03-16 17:14 ` Jakub Kicinski
2021-03-16 17:14   ` [Intel-wired-lan] " Jakub Kicinski
2021-03-16 17:27   ` Stefan Assmann
2021-03-16 17:27     ` [Intel-wired-lan] " Stefan Assmann
2021-03-16 20:29     ` Jakub Kicinski
2021-03-16 20:29       ` [Intel-wired-lan] " Jakub Kicinski
2021-03-16 22:02       ` Jesse Brandeburg
2021-03-16 22:02         ` [Intel-wired-lan] " Jesse Brandeburg
2021-03-17  7:49         ` Stefan Assmann
2021-03-17  7:49           ` [Intel-wired-lan] " Stefan Assmann
2021-03-17 17:27           ` Laba, SlawomirX
2021-03-17 17:27             ` [Intel-wired-lan] " Laba, SlawomirX
2021-03-17 18:14             ` Stefan Assmann
2021-03-17 18:14               ` [Intel-wired-lan] " Stefan Assmann
2021-07-06  8:07 ` Jankowski, Konrad0
2021-07-06  8:07   ` Jankowski, Konrad0

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.