linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net-next] devlink: Execute devlink health recover as a work
@ 2019-04-25 10:57 Moshe Shemesh
  2019-04-25 18:11 ` Saeed Mahameed
  2019-04-25 21:38 ` Jakub Kicinski
  0 siblings, 2 replies; 9+ messages in thread
From: Moshe Shemesh @ 2019-04-25 10:57 UTC (permalink / raw)
  To: David S. Miller; +Cc: Jiri Pirko, netdev, linux-kernel, Moshe Shemesh

Different reporters have different rules in the driver and are being
created/destroyed during different stages of driver load/unload/running.
So during execution of a reporter recover the flow can go through
another reporter's destroy and create. Such flow leads to deadlock
trying to lock a mutex already held if the flow was triggered by devlink
recover command.
To avoid such deadlock, we execute the recover flow from a workqueue.
Once the recover work is done successfully the reporter health state and
recover counter are being updated.

Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
---
 include/net/devlink.h |  1 +
 net/core/devlink.c    | 70 +++++++++++++++++++++++++++++++++++----------------
 2 files changed, 50 insertions(+), 21 deletions(-)

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 4f5e416..820b327 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -32,6 +32,7 @@ struct devlink {
 	struct list_head region_list;
 	u32 snapshot_id;
 	struct list_head reporter_list;
+	struct workqueue_struct *reporters_wq;
 	struct devlink_dpipe_headers *dpipe_headers;
 	const struct devlink_ops *ops;
 	struct device *dev;
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 7b91605..8ee380e 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -4422,6 +4422,7 @@ struct devlink_health_reporter {
 	u64 error_count;
 	u64 recovery_count;
 	u64 last_recovery_ts;
+	struct work_struct recover_work;
 };
 
 void *
@@ -4443,6 +4444,40 @@ struct devlink_health_reporter {
 	return NULL;
 }
 
+static int
+devlink_health_reporter_recover(struct devlink_health_reporter *reporter,
+				void *priv_ctx)
+{
+	int err;
+
+	if (!reporter->ops->recover)
+		return -EOPNOTSUPP;
+
+	err = reporter->ops->recover(reporter, priv_ctx);
+	if (err)
+		return err;
+
+	reporter->recovery_count++;
+	reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_HEALTHY;
+	reporter->last_recovery_ts = jiffies;
+
+	trace_devlink_health_reporter_state_update(reporter->devlink,
+						   reporter->ops->name,
+						   reporter->health_state);
+	return 0;
+}
+
+static void
+devlink_health_reporter_recover_work(struct work_struct *work)
+{
+	struct devlink_health_reporter *reporter;
+
+	reporter = container_of(work, struct devlink_health_reporter,
+				recover_work);
+
+	devlink_health_reporter_recover(reporter, NULL);
+}
+
 /**
  *	devlink_health_reporter_create - create devlink health reporter
  *
@@ -4483,6 +4518,8 @@ struct devlink_health_reporter *
 	reporter->devlink = devlink;
 	reporter->graceful_period = graceful_period;
 	reporter->auto_recover = auto_recover;
+	INIT_WORK(&reporter->recover_work,
+		  devlink_health_reporter_recover_work);
 	mutex_init(&reporter->dump_lock);
 	list_add_tail(&reporter->list, &devlink->reporter_list);
 unlock:
@@ -4505,6 +4542,7 @@ struct devlink_health_reporter *
 	mutex_unlock(&reporter->devlink->lock);
 	if (reporter->dump_fmsg)
 		devlink_fmsg_free(reporter->dump_fmsg);
+	cancel_work_sync(&reporter->recover_work);
 	kfree(reporter);
 }
 EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy);
@@ -4526,26 +4564,6 @@ struct devlink_health_reporter *
 }
 EXPORT_SYMBOL_GPL(devlink_health_reporter_state_update);
 
-static int
-devlink_health_reporter_recover(struct devlink_health_reporter *reporter,
-				void *priv_ctx)
-{
-	int err;
-
-	if (!reporter->ops->recover)
-		return -EOPNOTSUPP;
-
-	err = reporter->ops->recover(reporter, priv_ctx);
-	if (err)
-		return err;
-
-	reporter->recovery_count++;
-	reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_HEALTHY;
-	reporter->last_recovery_ts = jiffies;
-
-	return 0;
-}
-
 static void
 devlink_health_dump_clear(struct devlink_health_reporter *reporter)
 {
@@ -4813,7 +4831,11 @@ static int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
 	if (!reporter)
 		return -EINVAL;
 
-	return devlink_health_reporter_recover(reporter, NULL);
+	if (!reporter->ops->recover)
+		return -EOPNOTSUPP;
+
+	queue_work(devlink->reporters_wq, &reporter->recover_work);
+	return 0;
 }
 
 static int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb,
@@ -5234,6 +5256,11 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
 	INIT_LIST_HEAD(&devlink->param_list);
 	INIT_LIST_HEAD(&devlink->region_list);
 	INIT_LIST_HEAD(&devlink->reporter_list);
+	devlink->reporters_wq = create_singlethread_workqueue("devlink_reporters");
+	if (!devlink->reporters_wq) {
+		kfree(devlink);
+		return NULL;
+	}
 	mutex_init(&devlink->lock);
 	return devlink;
 }
@@ -5278,6 +5305,7 @@ void devlink_unregister(struct devlink *devlink)
 void devlink_free(struct devlink *devlink)
 {
 	mutex_destroy(&devlink->lock);
+	destroy_workqueue(devlink->reporters_wq);
 	WARN_ON(!list_empty(&devlink->reporter_list));
 	WARN_ON(!list_empty(&devlink->region_list));
 	WARN_ON(!list_empty(&devlink->param_list));
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH net-next] devlink: Execute devlink health recover as a work
  2019-04-25 10:57 [PATCH net-next] devlink: Execute devlink health recover as a work Moshe Shemesh
@ 2019-04-25 18:11 ` Saeed Mahameed
  2019-04-25 21:38 ` Jakub Kicinski
  1 sibling, 0 replies; 9+ messages in thread
From: Saeed Mahameed @ 2019-04-25 18:11 UTC (permalink / raw)
  To: davem, Moshe Shemesh; +Cc: netdev, Jiri Pirko, linux-kernel

On Thu, 2019-04-25 at 13:57 +0300, Moshe Shemesh wrote:
> Different reporters have different rules in the driver and are being
> created/destroyed during different stages of driver
> load/unload/running.
> So during execution of a reporter recover the flow can go through
> another reporter's destroy and create. Such flow leads to deadlock
> trying to lock a mutex already held if the flow was triggered by
> devlink
> recover command.
> To avoid such deadlock, we execute the recover flow from a workqueue.
> Once the recover work is done successfully the reporter health state
> and
> recover counter are being updated.
> 
> Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
> Signed-off-by: Jiri Pirko <jiri@mellanox.com>

Reviewed-by: Saeed Mahameed <saeedm@mellanox.com>

> ---
>  include/net/devlink.h |  1 +
>  net/core/devlink.c    | 70 +++++++++++++++++++++++++++++++++++----
> ------------
>  2 files changed, 50 insertions(+), 21 deletions(-)
> 
> diff --git a/include/net/devlink.h b/include/net/devlink.h
> index 4f5e416..820b327 100644
> --- a/include/net/devlink.h
> +++ b/include/net/devlink.h
> @@ -32,6 +32,7 @@ struct devlink {
>  	struct list_head region_list;
>  	u32 snapshot_id;
>  	struct list_head reporter_list;
> +	struct workqueue_struct *reporters_wq;
>  	struct devlink_dpipe_headers *dpipe_headers;
>  	const struct devlink_ops *ops;
>  	struct device *dev;
> diff --git a/net/core/devlink.c b/net/core/devlink.c
> index 7b91605..8ee380e 100644
> --- a/net/core/devlink.c
> +++ b/net/core/devlink.c
> @@ -4422,6 +4422,7 @@ struct devlink_health_reporter {
>  	u64 error_count;
>  	u64 recovery_count;
>  	u64 last_recovery_ts;
> +	struct work_struct recover_work;
>  };
>  
>  void *
> @@ -4443,6 +4444,40 @@ struct devlink_health_reporter {
>  	return NULL;
>  }
>  
> +static int
> +devlink_health_reporter_recover(struct devlink_health_reporter
> *reporter,
> +				void *priv_ctx)
> +{
> +	int err;
> +
> +	if (!reporter->ops->recover)
> +		return -EOPNOTSUPP;
> +
> +	err = reporter->ops->recover(reporter, priv_ctx);
> +	if (err)
> +		return err;
> +
> +	reporter->recovery_count++;
> +	reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_HEALTHY;
> +	reporter->last_recovery_ts = jiffies;
> +
> +	trace_devlink_health_reporter_state_update(reporter->devlink,
> +						   reporter->ops->name,
> +						   reporter-
> >health_state);
> +	return 0;
> +}
> +
> +static void
> +devlink_health_reporter_recover_work(struct work_struct *work)
> +{
> +	struct devlink_health_reporter *reporter;
> +
> +	reporter = container_of(work, struct devlink_health_reporter,
> +				recover_work);
> +
> +	devlink_health_reporter_recover(reporter, NULL);
> +}
> +
>  /**
>   *	devlink_health_reporter_create - create devlink health reporter
>   *
> @@ -4483,6 +4518,8 @@ struct devlink_health_reporter *
>  	reporter->devlink = devlink;
>  	reporter->graceful_period = graceful_period;
>  	reporter->auto_recover = auto_recover;
> +	INIT_WORK(&reporter->recover_work,
> +		  devlink_health_reporter_recover_work);
>  	mutex_init(&reporter->dump_lock);
>  	list_add_tail(&reporter->list, &devlink->reporter_list);
>  unlock:
> @@ -4505,6 +4542,7 @@ struct devlink_health_reporter *
>  	mutex_unlock(&reporter->devlink->lock);
>  	if (reporter->dump_fmsg)
>  		devlink_fmsg_free(reporter->dump_fmsg);
> +	cancel_work_sync(&reporter->recover_work);
>  	kfree(reporter);
>  }
>  EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy);
> @@ -4526,26 +4564,6 @@ struct devlink_health_reporter *
>  }
>  EXPORT_SYMBOL_GPL(devlink_health_reporter_state_update);
>  
> -static int
> -devlink_health_reporter_recover(struct devlink_health_reporter
> *reporter,
> -				void *priv_ctx)
> -{
> -	int err;
> -
> -	if (!reporter->ops->recover)
> -		return -EOPNOTSUPP;
> -
> -	err = reporter->ops->recover(reporter, priv_ctx);
> -	if (err)
> -		return err;
> -
> -	reporter->recovery_count++;
> -	reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_HEALTHY;
> -	reporter->last_recovery_ts = jiffies;
> -
> -	return 0;
> -}
> -
>  static void
>  devlink_health_dump_clear(struct devlink_health_reporter *reporter)
>  {
> @@ -4813,7 +4831,11 @@ static int
> devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
>  	if (!reporter)
>  		return -EINVAL;
>  
> -	return devlink_health_reporter_recover(reporter, NULL);
> +	if (!reporter->ops->recover)
> +		return -EOPNOTSUPP;
> +
> +	queue_work(devlink->reporters_wq, &reporter->recover_work);
> +	return 0;
>  }
>  
>  static int devlink_nl_cmd_health_reporter_diagnose_doit(struct
> sk_buff *skb,
> @@ -5234,6 +5256,11 @@ struct devlink *devlink_alloc(const struct
> devlink_ops *ops, size_t priv_size)
>  	INIT_LIST_HEAD(&devlink->param_list);
>  	INIT_LIST_HEAD(&devlink->region_list);
>  	INIT_LIST_HEAD(&devlink->reporter_list);
> +	devlink->reporters_wq =
> create_singlethread_workqueue("devlink_reporters");
> +	if (!devlink->reporters_wq) {
> +		kfree(devlink);
> +		return NULL;
> +	}
>  	mutex_init(&devlink->lock);
>  	return devlink;
>  }
> @@ -5278,6 +5305,7 @@ void devlink_unregister(struct devlink
> *devlink)
>  void devlink_free(struct devlink *devlink)
>  {
>  	mutex_destroy(&devlink->lock);
> +	destroy_workqueue(devlink->reporters_wq);
>  	WARN_ON(!list_empty(&devlink->reporter_list));
>  	WARN_ON(!list_empty(&devlink->region_list));
>  	WARN_ON(!list_empty(&devlink->param_list));

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH net-next] devlink: Execute devlink health recover as a work
  2019-04-25 10:57 [PATCH net-next] devlink: Execute devlink health recover as a work Moshe Shemesh
  2019-04-25 18:11 ` Saeed Mahameed
@ 2019-04-25 21:38 ` Jakub Kicinski
  2019-04-26  1:42   ` Saeed Mahameed
  2019-04-26  4:38   ` Jiri Pirko
  1 sibling, 2 replies; 9+ messages in thread
From: Jakub Kicinski @ 2019-04-25 21:38 UTC (permalink / raw)
  To: Moshe Shemesh; +Cc: David S. Miller, Jiri Pirko, netdev, linux-kernel

On Thu, 25 Apr 2019 13:57:03 +0300, Moshe Shemesh wrote:
> Different reporters have different rules in the driver and are being
> created/destroyed during different stages of driver load/unload/running.
> So during execution of a reporter recover the flow can go through
> another reporter's destroy and create. Such flow leads to deadlock
> trying to lock a mutex already held if the flow was triggered by devlink
> recover command.
> To avoid such deadlock, we execute the recover flow from a workqueue.
> Once the recover work is done successfully the reporter health state and
> recover counter are being updated.

Naive question, why not just run the doit unlocked?  Why the async?

> Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
> Signed-off-by: Jiri Pirko <jiri@mellanox.com>

One day we really gotta start documenting the context from which 
things are called and locks called when ops are invoked.. :)

> diff --git a/net/core/devlink.c b/net/core/devlink.c
> index 7b91605..8ee380e 100644
> --- a/net/core/devlink.c
> +++ b/net/core/devlink.c
> @@ -4443,6 +4444,40 @@ struct devlink_health_reporter {
>  	return NULL;
>  }
>  
> +static int
> +devlink_health_reporter_recover(struct devlink_health_reporter *reporter,
> +				void *priv_ctx)
> +{
> +	int err;
> +
> +	if (!reporter->ops->recover)
> +		return -EOPNOTSUPP;
> +
> +	err = reporter->ops->recover(reporter, priv_ctx);
> +	if (err)
> +		return err;
> +
> +	reporter->recovery_count++;
> +	reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_HEALTHY;
> +	reporter->last_recovery_ts = jiffies;

Well, the dump looks at these without taking any locks..

> +	trace_devlink_health_reporter_state_update(reporter->devlink,
> +						   reporter->ops->name,
> +						   reporter->health_state);
> +	return 0;
> +}
> +
> +static void
> +devlink_health_reporter_recover_work(struct work_struct *work)
> +{
> +	struct devlink_health_reporter *reporter;
> +
> +	reporter = container_of(work, struct devlink_health_reporter,
> +				recover_work);
> +
> +	devlink_health_reporter_recover(reporter, NULL);
> +}
> +
>  /**
>   *	devlink_health_reporter_create - create devlink health reporter
>   *

> @@ -4483,6 +4518,8 @@ struct devlink_health_reporter *
>  	reporter->devlink = devlink;
>  	reporter->graceful_period = graceful_period;
>  	reporter->auto_recover = auto_recover;
> +	INIT_WORK(&reporter->recover_work,
> +		  devlink_health_reporter_recover_work);
>  	mutex_init(&reporter->dump_lock);
>  	list_add_tail(&reporter->list, &devlink->reporter_list);
>  unlock:
> @@ -4505,6 +4542,7 @@ struct devlink_health_reporter *
>  	mutex_unlock(&reporter->devlink->lock);
>  	if (reporter->dump_fmsg)
>  		devlink_fmsg_free(reporter->dump_fmsg);
> +	cancel_work_sync(&reporter->recover_work);
>  	kfree(reporter);
>  }
>  EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy);
> @@ -4526,26 +4564,6 @@ struct devlink_health_reporter *
>  }
>  EXPORT_SYMBOL_GPL(devlink_health_reporter_state_update);
>  
> -static int
> -devlink_health_reporter_recover(struct devlink_health_reporter *reporter,
> -				void *priv_ctx)
> -{
> -	int err;
> -
> -	if (!reporter->ops->recover)
> -		return -EOPNOTSUPP;
> -
> -	err = reporter->ops->recover(reporter, priv_ctx);
> -	if (err)
> -		return err;
> -
> -	reporter->recovery_count++;
> -	reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_HEALTHY;
> -	reporter->last_recovery_ts = jiffies;
> -
> -	return 0;
> -}
> -
>  static void
>  devlink_health_dump_clear(struct devlink_health_reporter *reporter)
>  {
> @@ -4813,7 +4831,11 @@ static int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
>  	if (!reporter)
>  		return -EINVAL;
>  
> -	return devlink_health_reporter_recover(reporter, NULL);
> +	if (!reporter->ops->recover)
> +		return -EOPNOTSUPP;
> +
> +	queue_work(devlink->reporters_wq, &reporter->recover_work);
> +	return 0;
>  }

So the recover user space request will no longer return the status, and
it will not actually wait for the recover to happen.  Leaving user
pondering - did the recover run and fail, or did it nor get run yet...

>  static int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb,
> @@ -5234,6 +5256,11 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
>  	INIT_LIST_HEAD(&devlink->param_list);
>  	INIT_LIST_HEAD(&devlink->region_list);
>  	INIT_LIST_HEAD(&devlink->reporter_list);
> +	devlink->reporters_wq = create_singlethread_workqueue("devlink_reporters");

Why is it single threaded?

> +	if (!devlink->reporters_wq) {
> +		kfree(devlink);
> +		return NULL;
> +	}
>  	mutex_init(&devlink->lock);
>  	return devlink;
>  }
> @@ -5278,6 +5305,7 @@ void devlink_unregister(struct devlink *devlink)
>  void devlink_free(struct devlink *devlink)
>  {
>  	mutex_destroy(&devlink->lock);
> +	destroy_workqueue(devlink->reporters_wq);
>  	WARN_ON(!list_empty(&devlink->reporter_list));
>  	WARN_ON(!list_empty(&devlink->region_list));
>  	WARN_ON(!list_empty(&devlink->param_list));


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH net-next] devlink: Execute devlink health recover as a work
  2019-04-25 21:38 ` Jakub Kicinski
@ 2019-04-26  1:42   ` Saeed Mahameed
  2019-04-26  2:37     ` Jakub Kicinski
  2019-04-26  4:38   ` Jiri Pirko
  1 sibling, 1 reply; 9+ messages in thread
From: Saeed Mahameed @ 2019-04-26  1:42 UTC (permalink / raw)
  To: jakub.kicinski, Moshe Shemesh; +Cc: davem, Jiri Pirko, netdev, linux-kernel

On Thu, 2019-04-25 at 14:38 -0700, Jakub Kicinski wrote:
> On Thu, 25 Apr 2019 13:57:03 +0300, Moshe Shemesh wrote:
> > Different reporters have different rules in the driver and are
> > being
> > created/destroyed during different stages of driver
> > load/unload/running.
> > So during execution of a reporter recover the flow can go through
> > another reporter's destroy and create. Such flow leads to deadlock
> > trying to lock a mutex already held if the flow was triggered by
> > devlink
> > recover command.
> > To avoid such deadlock, we execute the recover flow from a
> > workqueue.
> > Once the recover work is done successfully the reporter health
> > state and
> > recover counter are being updated.
> 
> Naive question, why not just run the doit unlocked?  Why the async?
> 
> > Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
> > Signed-off-by: Jiri Pirko <jiri@mellanox.com>
> 
> One day we really gotta start documenting the context from which 
> things are called and locks called when ops are invoked.. :)
> 
> > diff --git a/net/core/devlink.c b/net/core/devlink.c
> > index 7b91605..8ee380e 100644
> > --- a/net/core/devlink.c
> > +++ b/net/core/devlink.c
> > @@ -4443,6 +4444,40 @@ struct devlink_health_reporter {
> >  	return NULL;
> >  }
> >  
> > +static int
> > +devlink_health_reporter_recover(struct devlink_health_reporter
> > *reporter,
> > +				void *priv_ctx)
> > +{
> > +	int err;
> > +
> > +	if (!reporter->ops->recover)
> > +		return -EOPNOTSUPP;
> > +
> > +	err = reporter->ops->recover(reporter, priv_ctx);
> > +	if (err)
> > +		return err;
> > +
> > +	reporter->recovery_count++;
> > +	reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_HEALTHY;
> > +	reporter->last_recovery_ts = jiffies;
> 
> Well, the dump looks at these without taking any locks..
> 
> > +	trace_devlink_health_reporter_state_update(reporter->devlink,
> > +						   reporter->ops->name,
> > +						   reporter-
> > >health_state);
> > +	return 0;
> > +}
> > +
> > +static void
> > +devlink_health_reporter_recover_work(struct work_struct *work)
> > +{
> > +	struct devlink_health_reporter *reporter;
> > +
> > +	reporter = container_of(work, struct devlink_health_reporter,
> > +				recover_work);
> > +
> > +	devlink_health_reporter_recover(reporter, NULL);
> > +}
> > +
> >  /**
> >   *	devlink_health_reporter_create - create devlink health reporter
> >   *
> > @@ -4483,6 +4518,8 @@ struct devlink_health_reporter *
> >  	reporter->devlink = devlink;
> >  	reporter->graceful_period = graceful_period;
> >  	reporter->auto_recover = auto_recover;
> > +	INIT_WORK(&reporter->recover_work,
> > +		  devlink_health_reporter_recover_work);
> >  	mutex_init(&reporter->dump_lock);
> >  	list_add_tail(&reporter->list, &devlink->reporter_list);
> >  unlock:
> > @@ -4505,6 +4542,7 @@ struct devlink_health_reporter *
> >  	mutex_unlock(&reporter->devlink->lock);
> >  	if (reporter->dump_fmsg)
> >  		devlink_fmsg_free(reporter->dump_fmsg);
> > +	cancel_work_sync(&reporter->recover_work);
> >  	kfree(reporter);
> >  }
> >  EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy);
> > @@ -4526,26 +4564,6 @@ struct devlink_health_reporter *
> >  }
> >  EXPORT_SYMBOL_GPL(devlink_health_reporter_state_update);
> >  
> > -static int
> > -devlink_health_reporter_recover(struct devlink_health_reporter
> > *reporter,
> > -				void *priv_ctx)
> > -{
> > -	int err;
> > -
> > -	if (!reporter->ops->recover)
> > -		return -EOPNOTSUPP;
> > -
> > -	err = reporter->ops->recover(reporter, priv_ctx);
> > -	if (err)
> > -		return err;
> > -
> > -	reporter->recovery_count++;
> > -	reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_HEALTHY;
> > -	reporter->last_recovery_ts = jiffies;
> > -
> > -	return 0;
> > -}
> > -
> >  static void
> >  devlink_health_dump_clear(struct devlink_health_reporter
> > *reporter)
> >  {
> > @@ -4813,7 +4831,11 @@ static int
> > devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
> >  	if (!reporter)
> >  		return -EINVAL;
> >  
> > -	return devlink_health_reporter_recover(reporter, NULL);
> > +	if (!reporter->ops->recover)
> > +		return -EOPNOTSUPP;
> > +
> > +	queue_work(devlink->reporters_wq, &reporter->recover_work);
> > +	return 0;
> >  }
> 
> So the recover user space request will no longer return the status,
> and
> it will not actually wait for the recover to happen.  Leaving user
> pondering - did the recover run and fail, or did it nor get run
> yet...
> 

wait_for_completion_interruptible_timeout is missing from the design ?

> >  static int devlink_nl_cmd_health_reporter_diagnose_doit(struct
> > sk_buff *skb,
> > @@ -5234,6 +5256,11 @@ struct devlink *devlink_alloc(const struct
> > devlink_ops *ops, size_t priv_size)
> >  	INIT_LIST_HEAD(&devlink->param_list);
> >  	INIT_LIST_HEAD(&devlink->region_list);
> >  	INIT_LIST_HEAD(&devlink->reporter_list);
> > +	devlink->reporters_wq =
> > create_singlethread_workqueue("devlink_reporters");
> 
> Why is it single threaded?
> 
> > +	if (!devlink->reporters_wq) {
> > +		kfree(devlink);
> > +		return NULL;
> > +	}
> >  	mutex_init(&devlink->lock);
> >  	return devlink;
> >  }
> > @@ -5278,6 +5305,7 @@ void devlink_unregister(struct devlink
> > *devlink)
> >  void devlink_free(struct devlink *devlink)
> >  {
> >  	mutex_destroy(&devlink->lock);
> > +	destroy_workqueue(devlink->reporters_wq);
> >  	WARN_ON(!list_empty(&devlink->reporter_list));
> >  	WARN_ON(!list_empty(&devlink->region_list));
> >  	WARN_ON(!list_empty(&devlink->param_list));

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH net-next] devlink: Execute devlink health recover as a work
  2019-04-26  1:42   ` Saeed Mahameed
@ 2019-04-26  2:37     ` Jakub Kicinski
  2019-04-26 13:04       ` Moshe Shemesh
  0 siblings, 1 reply; 9+ messages in thread
From: Jakub Kicinski @ 2019-04-26  2:37 UTC (permalink / raw)
  To: Saeed Mahameed; +Cc: Moshe Shemesh, davem, Jiri Pirko, netdev, linux-kernel

On Fri, 26 Apr 2019 01:42:34 +0000, Saeed Mahameed wrote:
> > > @@ -4813,7 +4831,11 @@ static int
> > > devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
> > >  	if (!reporter)
> > >  		return -EINVAL;
> > >  
> > > -	return devlink_health_reporter_recover(reporter, NULL);
> > > +	if (!reporter->ops->recover)
> > > +		return -EOPNOTSUPP;
> > > +
> > > +	queue_work(devlink->reporters_wq, &reporter->recover_work);
> > > +	return 0;
> > >  }  
> > 
> > So the recover user space request will no longer return the status,
> > and
> > it will not actually wait for the recover to happen.  Leaving user
> > pondering - did the recover run and fail, or did it nor get run
> > yet...
> >   
> 
> wait_for_completion_interruptible_timeout is missing from the design ?

Perhaps, but I think its better to avoid the async execution of 
the recover all together.  Perhaps its better to refcount the 
reporters on the call to recover_doit?  Or some such.. :)

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH net-next] devlink: Execute devlink health recover as a work
  2019-04-25 21:38 ` Jakub Kicinski
  2019-04-26  1:42   ` Saeed Mahameed
@ 2019-04-26  4:38   ` Jiri Pirko
  1 sibling, 0 replies; 9+ messages in thread
From: Jiri Pirko @ 2019-04-26  4:38 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Moshe Shemesh, David S. Miller, Jiri Pirko, netdev, linux-kernel

Thu, Apr 25, 2019 at 11:38:47PM CEST, jakub.kicinski@netronome.com wrote:
>On Thu, 25 Apr 2019 13:57:03 +0300, Moshe Shemesh wrote:
>> Different reporters have different rules in the driver and are being
>> created/destroyed during different stages of driver load/unload/running.
>> So during execution of a reporter recover the flow can go through
>> another reporter's destroy and create. Such flow leads to deadlock
>> trying to lock a mutex already held if the flow was triggered by devlink
>> recover command.
>> To avoid such deadlock, we execute the recover flow from a workqueue.
>> Once the recover work is done successfully the reporter health state and
>> recover counter are being updated.
>
>Naive question, why not just run the doit unlocked?  Why the async?

Hmm, you have a point here. That would be probably doable.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH net-next] devlink: Execute devlink health recover as a work
  2019-04-26  2:37     ` Jakub Kicinski
@ 2019-04-26 13:04       ` Moshe Shemesh
  2019-04-26 16:19         ` Jakub Kicinski
  0 siblings, 1 reply; 9+ messages in thread
From: Moshe Shemesh @ 2019-04-26 13:04 UTC (permalink / raw)
  To: Jakub Kicinski, Saeed Mahameed; +Cc: davem, Jiri Pirko, netdev, linux-kernel



On 4/26/2019 5:37 AM, Jakub Kicinski wrote:
> On Fri, 26 Apr 2019 01:42:34 +0000, Saeed Mahameed wrote:
>>>> @@ -4813,7 +4831,11 @@ static int
>>>> devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
>>>>   	if (!reporter)
>>>>   		return -EINVAL;
>>>>   
>>>> -	return devlink_health_reporter_recover(reporter, NULL);
>>>> +	if (!reporter->ops->recover)
>>>> +		return -EOPNOTSUPP;
>>>> +
>>>> +	queue_work(devlink->reporters_wq, &reporter->recover_work);
>>>> +	return 0;
>>>>   }
>>>
>>> So the recover user space request will no longer return the status,
>>> and
>>> it will not actually wait for the recover to happen.  Leaving user
>>> pondering - did the recover run and fail, or did it nor get run
>>> yet...
>>>    
>>
>> wait_for_completion_interruptible_timeout is missing from the design ?
> 
> Perhaps, but I think its better to avoid the async execution of
> the recover all together.  Perhaps its better to refcount the
> reporters on the call to recover_doit?  Or some such.. :)
> 

I tried using refcount instead of devlink lock here. But once I get to 
reporter destroy I wait for the refcount and not sure if I should 
release the reporter after some timeout or have endless wait for 
refcount. Both options seem not good.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH net-next] devlink: Execute devlink health recover as a work
  2019-04-26 13:04       ` Moshe Shemesh
@ 2019-04-26 16:19         ` Jakub Kicinski
  2019-04-27 17:15           ` Moshe Shemesh
  0 siblings, 1 reply; 9+ messages in thread
From: Jakub Kicinski @ 2019-04-26 16:19 UTC (permalink / raw)
  To: Moshe Shemesh; +Cc: Saeed Mahameed, davem, Jiri Pirko, netdev, linux-kernel

On Fri, Apr 26, 2019 at 6:04 AM Moshe Shemesh <moshe@mellanox.com> wrote:
> On 4/26/2019 5:37 AM, Jakub Kicinski wrote:
> > On Fri, 26 Apr 2019 01:42:34 +0000, Saeed Mahameed wrote:
> >>>> @@ -4813,7 +4831,11 @@ static int
> >>>> devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
> >>>>    if (!reporter)
> >>>>            return -EINVAL;
> >>>>
> >>>> -  return devlink_health_reporter_recover(reporter, NULL);
> >>>> +  if (!reporter->ops->recover)
> >>>> +          return -EOPNOTSUPP;
> >>>> +
> >>>> +  queue_work(devlink->reporters_wq, &reporter->recover_work);
> >>>> +  return 0;
> >>>>   }
> >>>
> >>> So the recover user space request will no longer return the status,
> >>> and
> >>> it will not actually wait for the recover to happen.  Leaving user
> >>> pondering - did the recover run and fail, or did it nor get run
> >>> yet...
> >>>
> >>
> >> wait_for_completion_interruptible_timeout is missing from the design ?
> >
> > Perhaps, but I think its better to avoid the async execution of
> > the recover all together.  Perhaps its better to refcount the
> > reporters on the call to recover_doit?  Or some such.. :)
> >
>
> I tried using refcount instead of devlink lock here. But once I get to
> reporter destroy I wait for the refcount and not sure if I should
> release the reporter after some timeout or have endless wait for
> refcount. Both options seem not good.

Well you should "endlessly" wait.  Why would the refcount not drop,
you have to remove it from the list first, so no new operations can
start, right?
In principle there is no difference between waiting for refcount to
drop, flushing the work, or waiting for the devlink lock if reporter
holds it?

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH net-next] devlink: Execute devlink health recover as a work
  2019-04-26 16:19         ` Jakub Kicinski
@ 2019-04-27 17:15           ` Moshe Shemesh
  0 siblings, 0 replies; 9+ messages in thread
From: Moshe Shemesh @ 2019-04-27 17:15 UTC (permalink / raw)
  To: Jakub Kicinski; +Cc: Saeed Mahameed, davem, Jiri Pirko, netdev, linux-kernel



On 4/26/2019 7:19 PM, Jakub Kicinski wrote:
> On Fri, Apr 26, 2019 at 6:04 AM Moshe Shemesh <moshe@mellanox.com> wrote:
>> On 4/26/2019 5:37 AM, Jakub Kicinski wrote:
>>> On Fri, 26 Apr 2019 01:42:34 +0000, Saeed Mahameed wrote:
>>>>>> @@ -4813,7 +4831,11 @@ static int
>>>>>> devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
>>>>>>     if (!reporter)
>>>>>>             return -EINVAL;
>>>>>>
>>>>>> -  return devlink_health_reporter_recover(reporter, NULL);
>>>>>> +  if (!reporter->ops->recover)
>>>>>> +          return -EOPNOTSUPP;
>>>>>> +
>>>>>> +  queue_work(devlink->reporters_wq, &reporter->recover_work);
>>>>>> +  return 0;
>>>>>>    }
>>>>>
>>>>> So the recover user space request will no longer return the status,
>>>>> and
>>>>> it will not actually wait for the recover to happen.  Leaving user
>>>>> pondering - did the recover run and fail, or did it nor get run
>>>>> yet...
>>>>>
>>>>
>>>> wait_for_completion_interruptible_timeout is missing from the design ?
>>>
>>> Perhaps, but I think its better to avoid the async execution of
>>> the recover all together.  Perhaps its better to refcount the
>>> reporters on the call to recover_doit?  Or some such.. :)
>>>
>>
>> I tried using refcount instead of devlink lock here. But once I get to
>> reporter destroy I wait for the refcount and not sure if I should
>> release the reporter after some timeout or have endless wait for
>> refcount. Both options seem not good.
> 
> Well you should "endlessly" wait.  Why would the refcount not drop,
> you have to remove it from the list first, so no new operations can
> start, right?
> In principle there is no difference between waiting for refcount to
> drop, flushing the work, or waiting for the devlink lock if reporter
> holds it?
> 
Makes sense, I will rewrite this patch, thanks.

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2019-04-27 17:15 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-04-25 10:57 [PATCH net-next] devlink: Execute devlink health recover as a work Moshe Shemesh
2019-04-25 18:11 ` Saeed Mahameed
2019-04-25 21:38 ` Jakub Kicinski
2019-04-26  1:42   ` Saeed Mahameed
2019-04-26  2:37     ` Jakub Kicinski
2019-04-26 13:04       ` Moshe Shemesh
2019-04-26 16:19         ` Jakub Kicinski
2019-04-27 17:15           ` Moshe Shemesh
2019-04-26  4:38   ` Jiri Pirko

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).