Netdev Archive on lore.kernel.org
 help / color / Atom feed
From: Jason Gunthorpe <jgg@mellanox.com>
To: Leon Romanovsky <leon@kernel.org>
Cc: Doug Ledford <dledford@redhat.com>,
	Leon Romanovsky <leonro@mellanox.com>,
	RDMA mailing list <linux-rdma@vger.kernel.org>,
	Yishai Hadas <yishaih@mellanox.com>,
	Saeed Mahameed <saeedm@mellanox.com>,
	linux-netdev <netdev@vger.kernel.org>
Subject: Re: [PATCH rdma-next v1 10/12] IB/mlx5: Enable subscription for device events over DEVX
Date: Mon, 24 Jun 2019 11:57:30 +0000
Message-ID: <20190624115726.GC5479@mellanox.com> (raw)
In-Reply-To: <20190618171540.11729-11-leon@kernel.org>

On Tue, Jun 18, 2019 at 08:15:38PM +0300, Leon Romanovsky wrote:
> From: Yishai Hadas <yishaih@mellanox.com>
> 
> Enable subscription for device events over DEVX.
> 
> Each subscription is added to the two level XA data structure according
> to its event number and the DEVX object information in case was given
> with the given target fd.
> 
> Those events will be reported over the given fd once will occur.
> Downstream patches will mange the dispatching to any subscription.
> 
> Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
> Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
>  drivers/infiniband/hw/mlx5/devx.c        | 564 ++++++++++++++++++++++-
>  include/uapi/rdma/mlx5_user_ioctl_cmds.h |   9 +
>  2 files changed, 566 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
> index e9b9ba5a3e9a..304b13e7a265 100644
> +++ b/drivers/infiniband/hw/mlx5/devx.c
> @@ -14,6 +14,7 @@
>  #include <linux/mlx5/driver.h>
>  #include <linux/mlx5/fs.h>
>  #include "mlx5_ib.h"
> +#include <linux/xarray.h>
>  
>  #define UVERBS_MODULE_NAME mlx5_ib
>  #include <rdma/uverbs_named_ioctl.h>
> @@ -33,6 +34,37 @@ struct devx_async_data {
>  	struct mlx5_ib_uapi_devx_async_cmd_hdr hdr;
>  };
>  
> +/* first level XA value data structure */
> +struct devx_event {
> +	struct xarray object_ids; /* second XA level, Key = object id */
> +	struct list_head unaffiliated_list;
> +};
> +
> +/* second level XA value data structure */
> +struct devx_obj_event {
> +	struct rcu_head rcu;
> +	struct list_head obj_sub_list;
> +};
> +
> +struct devx_event_subscription {
> +	struct list_head file_list; /* headed in private_data->
> +				     * subscribed_events_list
> +				     */
> +	struct list_head xa_list; /* headed in devx_event->unaffiliated_list or
> +				   * devx_obj_event->obj_sub_list
> +				   */
> +	struct list_head obj_list; /* headed in devx_object */
> +
> +	u32 xa_key_level1;
> +	u32 xa_key_level2;
> +	struct rcu_head	rcu;
> +	u64 cookie;
> +	bool is_obj_related;
> +	struct ib_uobject *fd_uobj;
> +	void *object;	/* May need direct access upon hot unplug */

This should be a 'struct file *' and have a better name.

And I'm unclear why we need to store both the ib_uobject and the
struct file for the same thing? And why are we storing the uobj here
instead of the struct devx_async_event_file *?

Since uobj->object == flip && filp->private_data == uobj, I have a
hard time to understand why we need both things, it seems to me that
if we get the fget on the filp then we can rely on the
filp->private_data to get back to the devx_async_event_file.

> +	struct eventfd_ctx *eventfd;
> +};
> +

>  /*
>   * As the obj_id in the firmware is not globally unique the object type
>   * must be considered upon checking for a valid object id.
> @@ -1143,14 +1275,53 @@ static void devx_cleanup_mkey(struct devx_obj *obj)
>  	write_unlock_irqrestore(&table->lock, flags);
>  }
>  
> +static void devx_cleanup_subscription(struct mlx5_ib_dev *dev,
> +				      struct devx_event_subscription *sub)
> +{
> +	list_del_rcu(&sub->file_list);
> +	list_del_rcu(&sub->xa_list);
> +
> +	if (sub->is_obj_related) {

is_obj_related looks like it is just list_empty(obj_list)??

Success oriented flow

> @@ -1523,6 +1700,350 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY)(
>  	return err;
>  }
>  
> +static void
> +subscribe_event_xa_dealloc(struct mlx5_devx_event_table *devx_event_table,
> +			   u32 key_level1,
> +			   u32 key_level2,
> +			   struct devx_obj_event *alloc_obj_event)
> +{
> +	struct devx_event *event;
> +
> +	/* Level 1 is valid for future use - no need to free */
> +	if (!alloc_obj_event)
> +		return;
> +
> +	event = xa_load(&devx_event_table->event_xa, key_level1);
> +	WARN_ON(!event);
> +
> +	xa_erase(&event->object_ids, key_level2);

Shoulnd't this only erase if the value stored is NULL?

> +	kfree(alloc_obj_event);
> +}
> +
> +static int
> +subscribe_event_xa_alloc(struct mlx5_devx_event_table *devx_event_table,
> +			 u32 key_level1,
> +			 bool is_level2,
> +			 u32 key_level2,
> +			 struct devx_obj_event **alloc_obj_event)
> +{
> +	struct devx_obj_event *obj_event;
> +	struct devx_event *event;
> +	bool new_entry_level1 = false;
> +	int err;
> +
> +	event = xa_load(&devx_event_table->event_xa, key_level1);
> +	if (!event) {
> +		event = kzalloc(sizeof(*event), GFP_KERNEL);
> +		if (!event)
> +			return -ENOMEM;
> +
> +		new_entry_level1 = true;
> +		INIT_LIST_HEAD(&event->unaffiliated_list);
> +		xa_init(&event->object_ids);
> +
> +		err = xa_insert(&devx_event_table->event_xa,
> +				key_level1,
> +				event,
> +				GFP_KERNEL);
> +		if (err)
> +			goto end;
> +	}
> +
> +	if (!is_level2)
> +		return 0;
> +
> +	obj_event = xa_load(&event->object_ids, key_level2);
> +	if (!obj_event) {
> +		err = xa_reserve(&event->object_ids, key_level2, GFP_KERNEL);
> +		if (err)
> +			goto err_level1;
> +
> +		obj_event = kzalloc(sizeof(*obj_event), GFP_KERNEL);
> +		if (!obj_event) {
> +			err = -ENOMEM;
> +			goto err_level2;
> +		}
> +
> +		INIT_LIST_HEAD(&obj_event->obj_sub_list);
> +		*alloc_obj_event = obj_event;

This is goofy, just store the empty obj_event in the xa instead of
using xa_reserve, and when you go to do the error unwind just delete
any level2' devx_obj_event' that has a list_empty(obj_sub_list), get
rid of the wonky alloc_obj_event stuff.

The best configuration would be to use devx_cleanup_subscription to
undo the partially ready subscription.

> +	}
> +
> +	return 0;
> +
> +err_level2:
> +	xa_erase(&event->object_ids, key_level2);
> +
> +err_level1:
> +	if (new_entry_level1)
> +		xa_erase(&devx_event_table->event_xa, key_level1);
> +end:
> +	if (new_entry_level1)
> +		kfree(event);

Can't do this, once the level1 is put in the tree it could be referenced by
the irqs. At least it needs a kfree_rcu, most likely it is simpler to
just leave it.

> +#define MAX_NUM_EVENTS 16
> +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT)(
> +	struct uverbs_attr_bundle *attrs)
> +{
> +	struct ib_uobject *devx_uobj = uverbs_attr_get_uobject(
> +				attrs,
> +				MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_OBJ_HANDLE);
> +	struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
> +		&attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
> +	struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
> +	struct ib_uobject *fd_uobj;
> +	struct devx_obj *obj = NULL;
> +	struct devx_async_event_file *ev_file;
> +	struct mlx5_devx_event_table *devx_event_table = &dev->devx_event_table;
> +	u16 *event_type_num_list;
> +	struct devx_event_subscription **event_sub_arr;
> +	struct devx_obj_event  **event_obj_array_alloc;
> +	int redirect_fd;
> +	bool use_eventfd = false;
> +	int num_events;
> +	int num_alloc_xa_entries = 0;
> +	u16 obj_type = 0;
> +	u64 cookie = 0;
> +	u32 obj_id = 0;
> +	int err;
> +	int i;
> +
> +	if (!c->devx_uid)
> +		return -EINVAL;
> +
> +	if (!IS_ERR(devx_uobj)) {
> +		obj = (struct devx_obj *)devx_uobj->object;
> +		if (obj)
> +			obj_id = get_dec_obj_id(obj->obj_id);
> +	}
> +
> +	fd_uobj = uverbs_attr_get_uobject(attrs,
> +				MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_HANDLE);
> +	if (IS_ERR(fd_uobj))
> +		return PTR_ERR(fd_uobj);
> +
> +	ev_file = container_of(fd_uobj, struct devx_async_event_file,
> +			       uobj);
> +
> +	if (uverbs_attr_is_valid(attrs,
> +				 MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_NUM)) {
> +		err = uverbs_copy_from(&redirect_fd, attrs,
> +			       MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_NUM);
> +		if (err)
> +			return err;
> +
> +		use_eventfd = true;
> +	}
> +
> +	if (uverbs_attr_is_valid(attrs,
> +				 MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_COOKIE)) {
> +		if (use_eventfd)
> +			return -EINVAL;
> +
> +		err = uverbs_copy_from(&cookie, attrs,
> +				MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_COOKIE);
> +		if (err)
> +			return err;
> +	}
> +
> +	num_events = uverbs_attr_ptr_get_array_size(
> +		attrs, MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_TYPE_NUM_LIST,
> +		sizeof(u16));
> +
> +	if (num_events < 0)
> +		return num_events;
> +
> +	if (num_events > MAX_NUM_EVENTS)
> +		return -EINVAL;
> +
> +	event_type_num_list = uverbs_attr_get_alloced_ptr(attrs,
> +			MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_TYPE_NUM_LIST);
> +
> +	if (!is_valid_events(dev->mdev, num_events, event_type_num_list, obj))
> +		return -EINVAL;
> +
> +	event_sub_arr = uverbs_zalloc(attrs,
> +		MAX_NUM_EVENTS * sizeof(struct devx_event_subscription *));
> +	event_obj_array_alloc = uverbs_zalloc(attrs,
> +		MAX_NUM_EVENTS * sizeof(struct devx_obj_event *));

There are so many list_heads in the devx_event_subscription, why not
use just one of them to store the allocated events instead of this
temp array? ie event_list looks good for this purpose.

> +
> +	if (!event_sub_arr || !event_obj_array_alloc)
> +		return -ENOMEM;
> +
> +	/* Protect from concurrent subscriptions to same XA entries to allow
> +	 * both to succeed
> +	 */
> +	mutex_lock(&devx_event_table->event_xa_lock);
> +	for (i = 0; i < num_events; i++) {
> +		u32 key_level1;
> +
> +		if (obj)
> +			obj_type = get_dec_obj_type(obj,
> +						    event_type_num_list[i]);
> +		key_level1 = event_type_num_list[i] | obj_type << 16;
> +
> +		err = subscribe_event_xa_alloc(devx_event_table,
> +					       key_level1,
> +					       obj ? true : false,
> +					       obj_id,
> +					       &event_obj_array_alloc[i]);

Usless ?:

> +		if (err)
> +			goto err;
> +
> +		num_alloc_xa_entries++;
> +		event_sub_arr[i] = kzalloc(sizeof(*event_sub_arr[i]),
> +					   GFP_KERNEL);
> +		if (!event_sub_arr[i])
> +			goto err;
> +
> +		if (use_eventfd) {
> +			event_sub_arr[i]->eventfd =
> +				eventfd_ctx_fdget(redirect_fd);
> +
> +			if (IS_ERR(event_sub_arr[i]->eventfd)) {
> +				err = PTR_ERR(event_sub_arr[i]->eventfd);
> +				event_sub_arr[i]->eventfd = NULL;
> +				goto err;
> +			}
> +		}
> +
> +		event_sub_arr[i]->cookie = cookie;
> +		event_sub_arr[i]->fd_uobj = fd_uobj;
> +		event_sub_arr[i]->object = fd_uobj->object;
> +		/* May be needed upon cleanup the devx object/subscription */
> +		event_sub_arr[i]->xa_key_level1 = key_level1;
> +		event_sub_arr[i]->xa_key_level2 = obj_id;
> +		event_sub_arr[i]->is_obj_related = obj ? true : false;

Unneeded ?:


Jason

  reply index

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-06-18 17:15 [PATCH rdma-next v1 00/12] DEVX asynchronous events Leon Romanovsky
2019-06-18 17:15 ` [PATCH mlx5-next v1 01/12] net/mlx5: Fix mlx5_core_destroy_cq() error flow Leon Romanovsky
2019-06-18 17:15 ` [PATCH mlx5-next v1 02/12] net/mlx5: Use event mask based on device capabilities Leon Romanovsky
2019-06-27  0:23   ` Saeed Mahameed
2019-06-18 17:15 ` [PATCH mlx5-next v1 03/12] net/mlx5: Expose the API to register for ANY event Leon Romanovsky
2019-06-18 17:15 ` [PATCH mlx5-next v1 04/12] net/mlx5: mlx5_core_create_cq() enhancements Leon Romanovsky
2019-06-18 17:15 ` [PATCH mlx5-next v1 05/12] net/mlx5: Report a CQ error event only when a handler was set Leon Romanovsky
2019-06-18 17:15 ` [PATCH mlx5-next v1 06/12] net/mlx5: Report EQE data upon CQ completion Leon Romanovsky
2019-06-18 17:15 ` [PATCH mlx5-next v1 07/12] net/mlx5: Expose device definitions for object events Leon Romanovsky
2019-06-18 17:15 ` [PATCH rdma-next v1 08/12] IB/mlx5: Introduce MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD Leon Romanovsky
2019-06-24 11:51   ` Jason Gunthorpe
2019-06-24 13:25     ` Yishai Hadas
2019-06-24 14:30       ` Jason Gunthorpe
2019-06-18 17:15 ` [PATCH rdma-next v1 09/12] IB/mlx5: Register DEVX with mlx5_core to get async events Leon Romanovsky
2019-06-24 11:52   ` Jason Gunthorpe
2019-06-24 13:36     ` Yishai Hadas
2019-06-24 14:30       ` Jason Gunthorpe
2019-06-18 17:15 ` [PATCH rdma-next v1 10/12] IB/mlx5: Enable subscription for device events over DEVX Leon Romanovsky
2019-06-24 11:57   ` Jason Gunthorpe [this message]
2019-06-24 16:13     ` Yishai Hadas
2019-06-24 17:56       ` Jason Gunthorpe
2019-06-18 17:15 ` [PATCH rdma-next v1 11/12] IB/mlx5: Implement DEVX dispatching event Leon Romanovsky
2019-06-24 12:03   ` Jason Gunthorpe
2019-06-24 16:55     ` Yishai Hadas
2019-06-24 18:06       ` Jason Gunthorpe
2019-06-25 14:41         ` Yishai Hadas
2019-06-25 20:23           ` Jason Gunthorpe
2019-06-18 17:15 ` [PATCH rdma-next v1 12/12] IB/mlx5: Add DEVX support for CQ events Leon Romanovsky
2019-06-24 12:04   ` Jason Gunthorpe
2019-06-24 17:03     ` Yishai Hadas
2019-06-24 18:06       ` Jason Gunthorpe
2019-06-18 18:51 ` [PATCH rdma-next v1 00/12] DEVX asynchronous events Saeed Mahameed
2019-06-19  4:45   ` Leon Romanovsky
2019-06-24 21:57     ` Saeed Mahameed
2019-06-30  8:53       ` Leon Romanovsky

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190624115726.GC5479@mellanox.com \
    --to=jgg@mellanox.com \
    --cc=dledford@redhat.com \
    --cc=leon@kernel.org \
    --cc=leonro@mellanox.com \
    --cc=linux-rdma@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=saeedm@mellanox.com \
    --cc=yishaih@mellanox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Netdev Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/netdev/0 netdev/git/0.git
	git clone --mirror https://lore.kernel.org/netdev/1 netdev/git/1.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 netdev netdev/ https://lore.kernel.org/netdev \
		netdev@vger.kernel.org netdev@archiver.kernel.org
	public-inbox-index netdev


Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.netdev


AGPL code for this site: git clone https://public-inbox.org/ public-inbox