All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jason Gunthorpe <jgg@mellanox.com>
To: Leon Romanovsky <leon@kernel.org>
Cc: Doug Ledford <dledford@redhat.com>,
	Leon Romanovsky <leonro@mellanox.com>,
	RDMA mailing list <linux-rdma@vger.kernel.org>,
	Yishai Hadas <yishaih@mellanox.com>,
	Saeed Mahameed <saeedm@mellanox.com>,
	linux-netdev <netdev@vger.kernel.org>
Subject: Re: [PATCH rdma-next v1 10/12] IB/mlx5: Enable subscription for device events over DEVX
Date: Mon, 24 Jun 2019 11:57:30 +0000	[thread overview]
Message-ID: <20190624115726.GC5479@mellanox.com> (raw)
In-Reply-To: <20190618171540.11729-11-leon@kernel.org>

On Tue, Jun 18, 2019 at 08:15:38PM +0300, Leon Romanovsky wrote:
> From: Yishai Hadas <yishaih@mellanox.com>
> 
> Enable subscription for device events over DEVX.
> 
> Each subscription is added to the two level XA data structure according
> to its event number and the DEVX object information in case was given
> with the given target fd.
> 
> Those events will be reported over the given fd once will occur.
> Downstream patches will mange the dispatching to any subscription.
> 
> Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
> Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
>  drivers/infiniband/hw/mlx5/devx.c        | 564 ++++++++++++++++++++++-
>  include/uapi/rdma/mlx5_user_ioctl_cmds.h |   9 +
>  2 files changed, 566 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
> index e9b9ba5a3e9a..304b13e7a265 100644
> +++ b/drivers/infiniband/hw/mlx5/devx.c
> @@ -14,6 +14,7 @@
>  #include <linux/mlx5/driver.h>
>  #include <linux/mlx5/fs.h>
>  #include "mlx5_ib.h"
> +#include <linux/xarray.h>
>  
>  #define UVERBS_MODULE_NAME mlx5_ib
>  #include <rdma/uverbs_named_ioctl.h>
> @@ -33,6 +34,37 @@ struct devx_async_data {
>  	struct mlx5_ib_uapi_devx_async_cmd_hdr hdr;
>  };
>  
> +/* first level XA value data structure */
> +struct devx_event {
> +	struct xarray object_ids; /* second XA level, Key = object id */
> +	struct list_head unaffiliated_list;
> +};
> +
> +/* second level XA value data structure */
> +struct devx_obj_event {
> +	struct rcu_head rcu;
> +	struct list_head obj_sub_list;
> +};
> +
> +struct devx_event_subscription {
> +	struct list_head file_list; /* headed in private_data->
> +				     * subscribed_events_list
> +				     */
> +	struct list_head xa_list; /* headed in devx_event->unaffiliated_list or
> +				   * devx_obj_event->obj_sub_list
> +				   */
> +	struct list_head obj_list; /* headed in devx_object */
> +
> +	u32 xa_key_level1;
> +	u32 xa_key_level2;
> +	struct rcu_head	rcu;
> +	u64 cookie;
> +	bool is_obj_related;
> +	struct ib_uobject *fd_uobj;
> +	void *object;	/* May need direct access upon hot unplug */

This should be a 'struct file *' and have a better name.

And I'm unclear why we need to store both the ib_uobject and the
struct file for the same thing? And why are we storing the uobj here
instead of the struct devx_async_event_file *?

Since uobj->object == flip && filp->private_data == uobj, I have a
hard time to understand why we need both things, it seems to me that
if we get the fget on the filp then we can rely on the
filp->private_data to get back to the devx_async_event_file.

> +	struct eventfd_ctx *eventfd;
> +};
> +

>  /*
>   * As the obj_id in the firmware is not globally unique the object type
>   * must be considered upon checking for a valid object id.
> @@ -1143,14 +1275,53 @@ static void devx_cleanup_mkey(struct devx_obj *obj)
>  	write_unlock_irqrestore(&table->lock, flags);
>  }
>  
> +static void devx_cleanup_subscription(struct mlx5_ib_dev *dev,
> +				      struct devx_event_subscription *sub)
> +{
> +	list_del_rcu(&sub->file_list);
> +	list_del_rcu(&sub->xa_list);
> +
> +	if (sub->is_obj_related) {

is_obj_related looks like it is just list_empty(obj_list)??

Success oriented flow

> @@ -1523,6 +1700,350 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY)(
>  	return err;
>  }
>  
> +static void
> +subscribe_event_xa_dealloc(struct mlx5_devx_event_table *devx_event_table,
> +			   u32 key_level1,
> +			   u32 key_level2,
> +			   struct devx_obj_event *alloc_obj_event)
> +{
> +	struct devx_event *event;
> +
> +	/* Level 1 is valid for future use - no need to free */
> +	if (!alloc_obj_event)
> +		return;
> +
> +	event = xa_load(&devx_event_table->event_xa, key_level1);
> +	WARN_ON(!event);
> +
> +	xa_erase(&event->object_ids, key_level2);

Shoulnd't this only erase if the value stored is NULL?

> +	kfree(alloc_obj_event);
> +}
> +
> +static int
> +subscribe_event_xa_alloc(struct mlx5_devx_event_table *devx_event_table,
> +			 u32 key_level1,
> +			 bool is_level2,
> +			 u32 key_level2,
> +			 struct devx_obj_event **alloc_obj_event)
> +{
> +	struct devx_obj_event *obj_event;
> +	struct devx_event *event;
> +	bool new_entry_level1 = false;
> +	int err;
> +
> +	event = xa_load(&devx_event_table->event_xa, key_level1);
> +	if (!event) {
> +		event = kzalloc(sizeof(*event), GFP_KERNEL);
> +		if (!event)
> +			return -ENOMEM;
> +
> +		new_entry_level1 = true;
> +		INIT_LIST_HEAD(&event->unaffiliated_list);
> +		xa_init(&event->object_ids);
> +
> +		err = xa_insert(&devx_event_table->event_xa,
> +				key_level1,
> +				event,
> +				GFP_KERNEL);
> +		if (err)
> +			goto end;
> +	}
> +
> +	if (!is_level2)
> +		return 0;
> +
> +	obj_event = xa_load(&event->object_ids, key_level2);
> +	if (!obj_event) {
> +		err = xa_reserve(&event->object_ids, key_level2, GFP_KERNEL);
> +		if (err)
> +			goto err_level1;
> +
> +		obj_event = kzalloc(sizeof(*obj_event), GFP_KERNEL);
> +		if (!obj_event) {
> +			err = -ENOMEM;
> +			goto err_level2;
> +		}
> +
> +		INIT_LIST_HEAD(&obj_event->obj_sub_list);
> +		*alloc_obj_event = obj_event;

This is goofy, just store the empty obj_event in the xa instead of
using xa_reserve, and when you go to do the error unwind just delete
any level2' devx_obj_event' that has a list_empty(obj_sub_list), get
rid of the wonky alloc_obj_event stuff.

The best configuration would be to use devx_cleanup_subscription to
undo the partially ready subscription.

> +	}
> +
> +	return 0;
> +
> +err_level2:
> +	xa_erase(&event->object_ids, key_level2);
> +
> +err_level1:
> +	if (new_entry_level1)
> +		xa_erase(&devx_event_table->event_xa, key_level1);
> +end:
> +	if (new_entry_level1)
> +		kfree(event);

Can't do this, once the level1 is put in the tree it could be referenced by
the irqs. At least it needs a kfree_rcu, most likely it is simpler to
just leave it.

> +#define MAX_NUM_EVENTS 16
> +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT)(
> +	struct uverbs_attr_bundle *attrs)
> +{
> +	struct ib_uobject *devx_uobj = uverbs_attr_get_uobject(
> +				attrs,
> +				MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_OBJ_HANDLE);
> +	struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
> +		&attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
> +	struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
> +	struct ib_uobject *fd_uobj;
> +	struct devx_obj *obj = NULL;
> +	struct devx_async_event_file *ev_file;
> +	struct mlx5_devx_event_table *devx_event_table = &dev->devx_event_table;
> +	u16 *event_type_num_list;
> +	struct devx_event_subscription **event_sub_arr;
> +	struct devx_obj_event  **event_obj_array_alloc;
> +	int redirect_fd;
> +	bool use_eventfd = false;
> +	int num_events;
> +	int num_alloc_xa_entries = 0;
> +	u16 obj_type = 0;
> +	u64 cookie = 0;
> +	u32 obj_id = 0;
> +	int err;
> +	int i;
> +
> +	if (!c->devx_uid)
> +		return -EINVAL;
> +
> +	if (!IS_ERR(devx_uobj)) {
> +		obj = (struct devx_obj *)devx_uobj->object;
> +		if (obj)
> +			obj_id = get_dec_obj_id(obj->obj_id);
> +	}
> +
> +	fd_uobj = uverbs_attr_get_uobject(attrs,
> +				MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_HANDLE);
> +	if (IS_ERR(fd_uobj))
> +		return PTR_ERR(fd_uobj);
> +
> +	ev_file = container_of(fd_uobj, struct devx_async_event_file,
> +			       uobj);
> +
> +	if (uverbs_attr_is_valid(attrs,
> +				 MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_NUM)) {
> +		err = uverbs_copy_from(&redirect_fd, attrs,
> +			       MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_NUM);
> +		if (err)
> +			return err;
> +
> +		use_eventfd = true;
> +	}
> +
> +	if (uverbs_attr_is_valid(attrs,
> +				 MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_COOKIE)) {
> +		if (use_eventfd)
> +			return -EINVAL;
> +
> +		err = uverbs_copy_from(&cookie, attrs,
> +				MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_COOKIE);
> +		if (err)
> +			return err;
> +	}
> +
> +	num_events = uverbs_attr_ptr_get_array_size(
> +		attrs, MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_TYPE_NUM_LIST,
> +		sizeof(u16));
> +
> +	if (num_events < 0)
> +		return num_events;
> +
> +	if (num_events > MAX_NUM_EVENTS)
> +		return -EINVAL;
> +
> +	event_type_num_list = uverbs_attr_get_alloced_ptr(attrs,
> +			MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_TYPE_NUM_LIST);
> +
> +	if (!is_valid_events(dev->mdev, num_events, event_type_num_list, obj))
> +		return -EINVAL;
> +
> +	event_sub_arr = uverbs_zalloc(attrs,
> +		MAX_NUM_EVENTS * sizeof(struct devx_event_subscription *));
> +	event_obj_array_alloc = uverbs_zalloc(attrs,
> +		MAX_NUM_EVENTS * sizeof(struct devx_obj_event *));

There are so many list_heads in the devx_event_subscription, why not
use just one of them to store the allocated events instead of this
temp array? ie event_list looks good for this purpose.

> +
> +	if (!event_sub_arr || !event_obj_array_alloc)
> +		return -ENOMEM;
> +
> +	/* Protect from concurrent subscriptions to same XA entries to allow
> +	 * both to succeed
> +	 */
> +	mutex_lock(&devx_event_table->event_xa_lock);
> +	for (i = 0; i < num_events; i++) {
> +		u32 key_level1;
> +
> +		if (obj)
> +			obj_type = get_dec_obj_type(obj,
> +						    event_type_num_list[i]);
> +		key_level1 = event_type_num_list[i] | obj_type << 16;
> +
> +		err = subscribe_event_xa_alloc(devx_event_table,
> +					       key_level1,
> +					       obj ? true : false,
> +					       obj_id,
> +					       &event_obj_array_alloc[i]);

Usless ?:

> +		if (err)
> +			goto err;
> +
> +		num_alloc_xa_entries++;
> +		event_sub_arr[i] = kzalloc(sizeof(*event_sub_arr[i]),
> +					   GFP_KERNEL);
> +		if (!event_sub_arr[i])
> +			goto err;
> +
> +		if (use_eventfd) {
> +			event_sub_arr[i]->eventfd =
> +				eventfd_ctx_fdget(redirect_fd);
> +
> +			if (IS_ERR(event_sub_arr[i]->eventfd)) {
> +				err = PTR_ERR(event_sub_arr[i]->eventfd);
> +				event_sub_arr[i]->eventfd = NULL;
> +				goto err;
> +			}
> +		}
> +
> +		event_sub_arr[i]->cookie = cookie;
> +		event_sub_arr[i]->fd_uobj = fd_uobj;
> +		event_sub_arr[i]->object = fd_uobj->object;
> +		/* May be needed upon cleanup the devx object/subscription */
> +		event_sub_arr[i]->xa_key_level1 = key_level1;
> +		event_sub_arr[i]->xa_key_level2 = obj_id;
> +		event_sub_arr[i]->is_obj_related = obj ? true : false;

Unneeded ?:


Jason

  reply	other threads:[~2019-06-24 11:57 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-06-18 17:15 [PATCH rdma-next v1 00/12] DEVX asynchronous events Leon Romanovsky
2019-06-18 17:15 ` Leon Romanovsky
2019-06-18 17:15 ` [PATCH mlx5-next v1 01/12] net/mlx5: Fix mlx5_core_destroy_cq() error flow Leon Romanovsky
2019-06-18 17:15 ` [PATCH mlx5-next v1 02/12] net/mlx5: Use event mask based on device capabilities Leon Romanovsky
2019-06-27  0:23   ` Saeed Mahameed
2019-06-18 17:15 ` [PATCH mlx5-next v1 03/12] net/mlx5: Expose the API to register for ANY event Leon Romanovsky
2019-06-18 17:15 ` [PATCH mlx5-next v1 04/12] net/mlx5: mlx5_core_create_cq() enhancements Leon Romanovsky
2019-06-18 17:15 ` [PATCH mlx5-next v1 05/12] net/mlx5: Report a CQ error event only when a handler was set Leon Romanovsky
2019-06-18 17:15 ` [PATCH mlx5-next v1 06/12] net/mlx5: Report EQE data upon CQ completion Leon Romanovsky
2019-06-18 17:15 ` [PATCH mlx5-next v1 07/12] net/mlx5: Expose device definitions for object events Leon Romanovsky
2019-06-18 17:15 ` [PATCH rdma-next v1 08/12] IB/mlx5: Introduce MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD Leon Romanovsky
2019-06-24 11:51   ` Jason Gunthorpe
2019-06-24 13:25     ` Yishai Hadas
2019-06-24 14:30       ` Jason Gunthorpe
2019-06-18 17:15 ` [PATCH rdma-next v1 09/12] IB/mlx5: Register DEVX with mlx5_core to get async events Leon Romanovsky
2019-06-24 11:52   ` Jason Gunthorpe
2019-06-24 13:36     ` Yishai Hadas
2019-06-24 14:30       ` Jason Gunthorpe
2019-06-18 17:15 ` [PATCH rdma-next v1 10/12] IB/mlx5: Enable subscription for device events over DEVX Leon Romanovsky
2019-06-24 11:57   ` Jason Gunthorpe [this message]
2019-06-24 16:13     ` Yishai Hadas
2019-06-24 17:56       ` Jason Gunthorpe
2019-06-18 17:15 ` [PATCH rdma-next v1 11/12] IB/mlx5: Implement DEVX dispatching event Leon Romanovsky
2019-06-24 12:03   ` Jason Gunthorpe
2019-06-24 16:55     ` Yishai Hadas
2019-06-24 18:06       ` Jason Gunthorpe
2019-06-25 14:41         ` Yishai Hadas
2019-06-25 20:23           ` Jason Gunthorpe
2019-06-18 17:15 ` [PATCH rdma-next v1 12/12] IB/mlx5: Add DEVX support for CQ events Leon Romanovsky
2019-06-24 12:04   ` Jason Gunthorpe
2019-06-24 17:03     ` Yishai Hadas
2019-06-24 18:06       ` Jason Gunthorpe
2019-06-18 18:51 ` [PATCH rdma-next v1 00/12] DEVX asynchronous events Saeed Mahameed
2019-06-19  4:45   ` Leon Romanovsky
2019-06-24 21:57     ` Saeed Mahameed
2019-06-30  8:53       ` Leon Romanovsky

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190624115726.GC5479@mellanox.com \
    --to=jgg@mellanox.com \
    --cc=dledford@redhat.com \
    --cc=leon@kernel.org \
    --cc=leonro@mellanox.com \
    --cc=linux-rdma@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=saeedm@mellanox.com \
    --cc=yishaih@mellanox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.