Re: [PATCH net-next] devlink: Execute devlink health recover as a work

From: Saeed Mahameed <saeedm@mellanox.com>
To: "jakub.kicinski@netronome.com" <jakub.kicinski@netronome.com>,
	Moshe Shemesh <moshe@mellanox.com>
Cc: "davem@davemloft.net" <davem@davemloft.net>,
	Jiri Pirko <jiri@mellanox.com>,
	"netdev@vger.kernel.org" <netdev@vger.kernel.org>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>
Subject: Re: [PATCH net-next] devlink: Execute devlink health recover as a work
Date: Fri, 26 Apr 2019 01:42:34 +0000	[thread overview]
Message-ID: <e540b1c192632dfcf4c0367be133c9ca51f33b43.camel@mellanox.com> (raw)
In-Reply-To: <20190425143847.417033ab@cakuba.netronome.com>

On Thu, 2019-04-25 at 14:38 -0700, Jakub Kicinski wrote:
> On Thu, 25 Apr 2019 13:57:03 +0300, Moshe Shemesh wrote:
> > Different reporters have different rules in the driver and are
> > being
> > created/destroyed during different stages of driver
> > load/unload/running.
> > So during execution of a reporter recover the flow can go through
> > another reporter's destroy and create. Such flow leads to deadlock
> > trying to lock a mutex already held if the flow was triggered by
> > devlink
> > recover command.
> > To avoid such deadlock, we execute the recover flow from a
> > workqueue.
> > Once the recover work is done successfully the reporter health
> > state and
> > recover counter are being updated.
> 
> Naive question, why not just run the doit unlocked?  Why the async?
> 
> > Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
> > Signed-off-by: Jiri Pirko <jiri@mellanox.com>
> 
> One day we really gotta start documenting the context from which 
> things are called and locks called when ops are invoked.. :)
> 
> > diff --git a/net/core/devlink.c b/net/core/devlink.c
> > index 7b91605..8ee380e 100644
> > --- a/net/core/devlink.c
> > +++ b/net/core/devlink.c
> > @@ -4443,6 +4444,40 @@ struct devlink_health_reporter {
> >  	return NULL;
> >  }
> >  
> > +static int
> > +devlink_health_reporter_recover(struct devlink_health_reporter
> > *reporter,
> > +				void *priv_ctx)
> > +{
> > +	int err;
> > +
> > +	if (!reporter->ops->recover)
> > +		return -EOPNOTSUPP;
> > +
> > +	err = reporter->ops->recover(reporter, priv_ctx);
> > +	if (err)
> > +		return err;
> > +
> > +	reporter->recovery_count++;
> > +	reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_HEALTHY;
> > +	reporter->last_recovery_ts = jiffies;
> 
> Well, the dump looks at these without taking any locks..
> 
> > +	trace_devlink_health_reporter_state_update(reporter->devlink,
> > +						   reporter->ops->name,
> > +						   reporter-
> > >health_state);
> > +	return 0;
> > +}
> > +
> > +static void
> > +devlink_health_reporter_recover_work(struct work_struct *work)
> > +{
> > +	struct devlink_health_reporter *reporter;
> > +
> > +	reporter = container_of(work, struct devlink_health_reporter,
> > +				recover_work);
> > +
> > +	devlink_health_reporter_recover(reporter, NULL);
> > +}
> > +
> >  /**
> >   *	devlink_health_reporter_create - create devlink health reporter
> >   *
> > @@ -4483,6 +4518,8 @@ struct devlink_health_reporter *
> >  	reporter->devlink = devlink;
> >  	reporter->graceful_period = graceful_period;
> >  	reporter->auto_recover = auto_recover;
> > +	INIT_WORK(&reporter->recover_work,
> > +		  devlink_health_reporter_recover_work);
> >  	mutex_init(&reporter->dump_lock);
> >  	list_add_tail(&reporter->list, &devlink->reporter_list);
> >  unlock:
> > @@ -4505,6 +4542,7 @@ struct devlink_health_reporter *
> >  	mutex_unlock(&reporter->devlink->lock);
> >  	if (reporter->dump_fmsg)
> >  		devlink_fmsg_free(reporter->dump_fmsg);
> > +	cancel_work_sync(&reporter->recover_work);
> >  	kfree(reporter);
> >  }
> >  EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy);
> > @@ -4526,26 +4564,6 @@ struct devlink_health_reporter *
> >  }
> >  EXPORT_SYMBOL_GPL(devlink_health_reporter_state_update);
> >  
> > -static int
> > -devlink_health_reporter_recover(struct devlink_health_reporter
> > *reporter,
> > -				void *priv_ctx)
> > -{
> > -	int err;
> > -
> > -	if (!reporter->ops->recover)
> > -		return -EOPNOTSUPP;
> > -
> > -	err = reporter->ops->recover(reporter, priv_ctx);
> > -	if (err)
> > -		return err;
> > -
> > -	reporter->recovery_count++;
> > -	reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_HEALTHY;
> > -	reporter->last_recovery_ts = jiffies;
> > -
> > -	return 0;
> > -}
> > -
> >  static void
> >  devlink_health_dump_clear(struct devlink_health_reporter
> > *reporter)
> >  {
> > @@ -4813,7 +4831,11 @@ static int
> > devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
> >  	if (!reporter)
> >  		return -EINVAL;
> >  
> > -	return devlink_health_reporter_recover(reporter, NULL);
> > +	if (!reporter->ops->recover)
> > +		return -EOPNOTSUPP;
> > +
> > +	queue_work(devlink->reporters_wq, &reporter->recover_work);
> > +	return 0;
> >  }
> 
> So the recover user space request will no longer return the status,
> and
> it will not actually wait for the recover to happen.  Leaving user
> pondering - did the recover run and fail, or did it nor get run
> yet...
> 

wait_for_completion_interruptible_timeout is missing from the design ?

> >  static int devlink_nl_cmd_health_reporter_diagnose_doit(struct
> > sk_buff *skb,
> > @@ -5234,6 +5256,11 @@ struct devlink *devlink_alloc(const struct
> > devlink_ops *ops, size_t priv_size)
> >  	INIT_LIST_HEAD(&devlink->param_list);
> >  	INIT_LIST_HEAD(&devlink->region_list);
> >  	INIT_LIST_HEAD(&devlink->reporter_list);
> > +	devlink->reporters_wq =
> > create_singlethread_workqueue("devlink_reporters");
> 
> Why is it single threaded?
> 
> > +	if (!devlink->reporters_wq) {
> > +		kfree(devlink);
> > +		return NULL;
> > +	}
> >  	mutex_init(&devlink->lock);
> >  	return devlink;
> >  }
> > @@ -5278,6 +5305,7 @@ void devlink_unregister(struct devlink
> > *devlink)
> >  void devlink_free(struct devlink *devlink)
> >  {
> >  	mutex_destroy(&devlink->lock);
> > +	destroy_workqueue(devlink->reporters_wq);
> >  	WARN_ON(!list_empty(&devlink->reporter_list));
> >  	WARN_ON(!list_empty(&devlink->region_list));
> >  	WARN_ON(!list_empty(&devlink->param_list));