linux-rdma.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3] RDMA/rtrs-srv: Incorporate ib_register_client into rtrs server init
@ 2020-08-20  3:41 Md Haris Iqbal
  2020-08-20  7:26 ` Leon Romanovsky
  2020-08-27 11:45 ` Jason Gunthorpe
  0 siblings, 2 replies; 6+ messages in thread
From: Md Haris Iqbal @ 2020-08-20  3:41 UTC (permalink / raw)
  To: danil.kipnis, jinpu.wang, linux-rdma, dledford, jgg, leon
  Cc: Md Haris Iqbal, kernel test robot

The rnbd_server module's communication manager (cm) initialization depends
on the registration of the "network namespace subsystem" of the RDMA CM
agent module. As such, when the kernel is configured to load the
rnbd_server and the RDMA cma module during initialization; and if the
rnbd_server module is initialized before RDMA cma module, a null ptr
dereference occurs during the RDMA bind operation.

Call trace below,

[    1.904782] Call Trace:
[    1.904782]  ? xas_load+0xd/0x80
[    1.904782]  xa_load+0x47/0x80
[    1.904782]  cma_ps_find+0x44/0x70
[    1.904782]  rdma_bind_addr+0x782/0x8b0
[    1.904782]  ? get_random_bytes+0x35/0x40
[    1.904782]  rtrs_srv_cm_init+0x50/0x80
[    1.904782]  rtrs_srv_open+0x102/0x180
[    1.904782]  ? rnbd_client_init+0x6e/0x6e
[    1.904782]  rnbd_srv_init_module+0x34/0x84
[    1.904782]  ? rnbd_client_init+0x6e/0x6e
[    1.904782]  do_one_initcall+0x4a/0x200
[    1.904782]  kernel_init_freeable+0x1f1/0x26e
[    1.904782]  ? rest_init+0xb0/0xb0
[    1.904782]  kernel_init+0xe/0x100
[    1.904782]  ret_from_fork+0x22/0x30
[    1.904782] Modules linked in:
[    1.904782] CR2: 0000000000000015
[    1.904782] ---[ end trace c42df88d6c7b0a48 ]---

All this happens cause the cm init is in the call chain of the module init,
which is not a preferred practice.

So remove the call to rdma_create_id() from the module init call chain.
Instead register rtrs-srv as an ib client, which makes sure that the
rdma_create_id() is called only when an ib device is added.

Fixes: 9cb837480424 ("RDMA/rtrs: server: main functionality")
Reported-by: kernel test robot <rong.a.chen@intel.com>
Signed-off-by: Md Haris Iqbal <haris.iqbal@cloud.ionos.com>
---
Change in v3:
	Removed RDMA init error check while rtrs server open
	Removed -1 assignment for ib_dev_count on RDMA init error
Change in v2:
        Use only single variable to track number of IB devices and failure
        Change according to kernel coding style

 drivers/infiniband/ulp/rtrs/rtrs-srv.c | 69 ++++++++++++++++++++++++--
 drivers/infiniband/ulp/rtrs/rtrs-srv.h |  6 +++
 2 files changed, 72 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c
index a219bd1bdbc2..febc1478b96f 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c
@@ -16,6 +16,7 @@
 #include "rtrs-srv.h"
 #include "rtrs-log.h"
 #include <rdma/ib_cm.h>
+#include <rdma/ib_verbs.h>
 
 MODULE_DESCRIPTION("RDMA Transport Server");
 MODULE_LICENSE("GPL");
@@ -31,6 +32,7 @@ MODULE_LICENSE("GPL");
 static struct rtrs_rdma_dev_pd dev_pd;
 static mempool_t *chunk_pool;
 struct class *rtrs_dev_class;
+static struct rtrs_srv_ib_ctx ib_ctx;
 
 static int __read_mostly max_chunk_size = DEFAULT_MAX_CHUNK_SIZE;
 static int __read_mostly sess_queue_depth = DEFAULT_SESS_QUEUE_DEPTH;
@@ -2033,6 +2035,63 @@ static void free_srv_ctx(struct rtrs_srv_ctx *ctx)
 	kfree(ctx);
 }
 
+static int rtrs_srv_add_one(struct ib_device *device)
+{
+	struct rtrs_srv_ctx *ctx;
+	int ret;
+
+	if (ib_ctx.ib_dev_count)
+		goto out;
+
+	/*
+	 * Since our CM IDs are NOT bound to any ib device we will create them
+	 * only once
+	 */
+	ctx = ib_ctx.srv_ctx;
+	ret = rtrs_srv_rdma_init(ctx, ib_ctx.port);
+	if (ret) {
+		/*
+		 * We errored out here.
+		 * According to the ib code, if we encounter an error here then the
+		 * error code is ignored, and no more calls to our ops are made.
+		 */
+		pr_err("Failed to initialize RDMA connection");
+		return ret;
+	}
+
+out:
+	/*
+	 * Keep a track on the number of ib devices added
+	 */
+	ib_ctx.ib_dev_count++;
+
+	return 0;
+}
+
+static void rtrs_srv_remove_one(struct ib_device *device, void *client_data)
+{
+	struct rtrs_srv_ctx *ctx;
+
+	ib_ctx.ib_dev_count--;
+
+	if (ib_ctx.ib_dev_count)
+		return;
+
+	/*
+	 * Since our CM IDs are NOT bound to any ib device we will remove them
+	 * only once, when the last device is removed
+	 */
+	ctx = ib_ctx.srv_ctx;
+	rdma_destroy_id(ctx->cm_id_ip);
+	rdma_destroy_id(ctx->cm_id_ib);
+}
+
+static struct ib_client rtrs_srv_client = {
+	.name	= "rtrs_server",
+	.add	= rtrs_srv_add_one,
+	.remove	= rtrs_srv_remove_one
+};
+
 /**
  * rtrs_srv_open() - open RTRS server context
  * @ops:		callback functions
@@ -2051,7 +2110,12 @@ struct rtrs_srv_ctx *rtrs_srv_open(struct rtrs_srv_ops *ops, u16 port)
 	if (!ctx)
 		return ERR_PTR(-ENOMEM);
 
-	err = rtrs_srv_rdma_init(ctx, port);
+	ib_ctx = (struct rtrs_srv_ib_ctx) {
+		.srv_ctx	= ctx,
+		.port		= port,
+	};
+
+	err = ib_register_client(&rtrs_srv_client);
 	if (err) {
 		free_srv_ctx(ctx);
 		return ERR_PTR(err);
@@ -2090,8 +2154,7 @@ static void close_ctx(struct rtrs_srv_ctx *ctx)
  */
 void rtrs_srv_close(struct rtrs_srv_ctx *ctx)
 {
-	rdma_destroy_id(ctx->cm_id_ip);
-	rdma_destroy_id(ctx->cm_id_ib);
+	ib_unregister_client(&rtrs_srv_client);
 	close_ctx(ctx);
 	free_srv_ctx(ctx);
 }
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.h b/drivers/infiniband/ulp/rtrs/rtrs-srv.h
index dc95b0932f0d..e8f7e99a9a6e 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-srv.h
+++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.h
@@ -118,6 +118,12 @@ struct rtrs_srv_ctx {
 	struct list_head srv_list;
 };
 
+struct rtrs_srv_ib_ctx {
+	struct rtrs_srv_ctx	*srv_ctx;
+	u16			port;
+	int			ib_dev_count;
+};
+
 extern struct class *rtrs_dev_class;
 
 void close_sess(struct rtrs_srv_sess *sess);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH v3] RDMA/rtrs-srv: Incorporate ib_register_client into rtrs server init
  2020-08-20  3:41 [PATCH v3] RDMA/rtrs-srv: Incorporate ib_register_client into rtrs server init Md Haris Iqbal
@ 2020-08-20  7:26 ` Leon Romanovsky
  2020-08-20  8:51   ` Jinpu Wang
  2020-08-27 11:45 ` Jason Gunthorpe
  1 sibling, 1 reply; 6+ messages in thread
From: Leon Romanovsky @ 2020-08-20  7:26 UTC (permalink / raw)
  To: Md Haris Iqbal
  Cc: danil.kipnis, jinpu.wang, linux-rdma, dledford, jgg, kernel test robot

On Thu, Aug 20, 2020 at 09:11:52AM +0530, Md Haris Iqbal wrote:
> The rnbd_server module's communication manager (cm) initialization depends
> on the registration of the "network namespace subsystem" of the RDMA CM
> agent module. As such, when the kernel is configured to load the
> rnbd_server and the RDMA cma module during initialization; and if the
> rnbd_server module is initialized before RDMA cma module, a null ptr
> dereference occurs during the RDMA bind operation.
>
> Call trace below,
>
> [    1.904782] Call Trace:
> [    1.904782]  ? xas_load+0xd/0x80
> [    1.904782]  xa_load+0x47/0x80
> [    1.904782]  cma_ps_find+0x44/0x70
> [    1.904782]  rdma_bind_addr+0x782/0x8b0
> [    1.904782]  ? get_random_bytes+0x35/0x40
> [    1.904782]  rtrs_srv_cm_init+0x50/0x80
> [    1.904782]  rtrs_srv_open+0x102/0x180
> [    1.904782]  ? rnbd_client_init+0x6e/0x6e
> [    1.904782]  rnbd_srv_init_module+0x34/0x84
> [    1.904782]  ? rnbd_client_init+0x6e/0x6e
> [    1.904782]  do_one_initcall+0x4a/0x200
> [    1.904782]  kernel_init_freeable+0x1f1/0x26e
> [    1.904782]  ? rest_init+0xb0/0xb0
> [    1.904782]  kernel_init+0xe/0x100
> [    1.904782]  ret_from_fork+0x22/0x30
> [    1.904782] Modules linked in:
> [    1.904782] CR2: 0000000000000015
> [    1.904782] ---[ end trace c42df88d6c7b0a48 ]---
>
> All this happens cause the cm init is in the call chain of the module init,
> which is not a preferred practice.
>
> So remove the call to rdma_create_id() from the module init call chain.
> Instead register rtrs-srv as an ib client, which makes sure that the
> rdma_create_id() is called only when an ib device is added.
>
> Fixes: 9cb837480424 ("RDMA/rtrs: server: main functionality")
> Reported-by: kernel test robot <rong.a.chen@intel.com>
> Signed-off-by: Md Haris Iqbal <haris.iqbal@cloud.ionos.com>
> ---
> Change in v3:
> 	Removed RDMA init error check while rtrs server open
> 	Removed -1 assignment for ib_dev_count on RDMA init error
> Change in v2:
>         Use only single variable to track number of IB devices and failure
>         Change according to kernel coding style
>
>  drivers/infiniband/ulp/rtrs/rtrs-srv.c | 69 ++++++++++++++++++++++++--
>  drivers/infiniband/ulp/rtrs/rtrs-srv.h |  6 +++
>  2 files changed, 72 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c
> index a219bd1bdbc2..febc1478b96f 100644
> --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c
> +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c
> @@ -16,6 +16,7 @@
>  #include "rtrs-srv.h"
>  #include "rtrs-log.h"
>  #include <rdma/ib_cm.h>
> +#include <rdma/ib_verbs.h>
>
>  MODULE_DESCRIPTION("RDMA Transport Server");
>  MODULE_LICENSE("GPL");
> @@ -31,6 +32,7 @@ MODULE_LICENSE("GPL");
>  static struct rtrs_rdma_dev_pd dev_pd;
>  static mempool_t *chunk_pool;
>  struct class *rtrs_dev_class;
> +static struct rtrs_srv_ib_ctx ib_ctx;
>
>  static int __read_mostly max_chunk_size = DEFAULT_MAX_CHUNK_SIZE;
>  static int __read_mostly sess_queue_depth = DEFAULT_SESS_QUEUE_DEPTH;
> @@ -2033,6 +2035,63 @@ static void free_srv_ctx(struct rtrs_srv_ctx *ctx)
>  	kfree(ctx);
>  }
>
> +static int rtrs_srv_add_one(struct ib_device *device)
> +{
> +	struct rtrs_srv_ctx *ctx;
> +	int ret;
> +
> +	if (ib_ctx.ib_dev_count)
> +		goto out;
> +
> +	/*
> +	 * Since our CM IDs are NOT bound to any ib device we will create them
> +	 * only once
> +	 */
> +	ctx = ib_ctx.srv_ctx;
> +	ret = rtrs_srv_rdma_init(ctx, ib_ctx.port);
> +	if (ret) {
> +		/*
> +		 * We errored out here.
> +		 * According to the ib code, if we encounter an error here then the
> +		 * error code is ignored, and no more calls to our ops are made.
> +		 */
> +		pr_err("Failed to initialize RDMA connection");
> +		return ret;
> +	}
> +
> +out:
> +	/*
> +	 * Keep a track on the number of ib devices added
> +	 */
> +	ib_ctx.ib_dev_count++;
> +
> +	return 0;
> +}
> +
> +static void rtrs_srv_remove_one(struct ib_device *device, void *client_data)
> +{
> +	struct rtrs_srv_ctx *ctx;
> +
> +	ib_ctx.ib_dev_count--;
> +
> +	if (ib_ctx.ib_dev_count)
> +		return;
> +
> +	/*
> +	 * Since our CM IDs are NOT bound to any ib device we will remove them
> +	 * only once, when the last device is removed
> +	 */
> +	ctx = ib_ctx.srv_ctx;
> +	rdma_destroy_id(ctx->cm_id_ip);
> +	rdma_destroy_id(ctx->cm_id_ib);
> +}
> +
> +static struct ib_client rtrs_srv_client = {
> +	.name	= "rtrs_server",
> +	.add	= rtrs_srv_add_one,
> +	.remove	= rtrs_srv_remove_one
> +};
> +
>  /**
>   * rtrs_srv_open() - open RTRS server context
>   * @ops:		callback functions
> @@ -2051,7 +2110,12 @@ struct rtrs_srv_ctx *rtrs_srv_open(struct rtrs_srv_ops *ops, u16 port)
>  	if (!ctx)
>  		return ERR_PTR(-ENOMEM);
>
> -	err = rtrs_srv_rdma_init(ctx, port);
> +	ib_ctx = (struct rtrs_srv_ib_ctx) {
> +		.srv_ctx	= ctx,
> +		.port		= port,
> +	};

It can be my personal issue, but I prefer to see this type of assignment in variable declarations only.
In the code, the better style will be to use direct assignment, e.g. "ib.ctx.port = port;"
It will simplify future refactoring.

Thanks

> +
> +	err = ib_register_client(&rtrs_srv_client);
>  	if (err) {
>  		free_srv_ctx(ctx);
>  		return ERR_PTR(err);
> @@ -2090,8 +2154,7 @@ static void close_ctx(struct rtrs_srv_ctx *ctx)
>   */
>  void rtrs_srv_close(struct rtrs_srv_ctx *ctx)
>  {
> -	rdma_destroy_id(ctx->cm_id_ip);
> -	rdma_destroy_id(ctx->cm_id_ib);
> +	ib_unregister_client(&rtrs_srv_client);
>  	close_ctx(ctx);
>  	free_srv_ctx(ctx);
>  }
> diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.h b/drivers/infiniband/ulp/rtrs/rtrs-srv.h
> index dc95b0932f0d..e8f7e99a9a6e 100644
> --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.h
> +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.h
> @@ -118,6 +118,12 @@ struct rtrs_srv_ctx {
>  	struct list_head srv_list;
>  };
>
> +struct rtrs_srv_ib_ctx {
> +	struct rtrs_srv_ctx	*srv_ctx;
> +	u16			port;
> +	int			ib_dev_count;
> +};
> +
>  extern struct class *rtrs_dev_class;
>
>  void close_sess(struct rtrs_srv_sess *sess);
> --
> 2.25.1
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v3] RDMA/rtrs-srv: Incorporate ib_register_client into rtrs server init
  2020-08-20  7:26 ` Leon Romanovsky
@ 2020-08-20  8:51   ` Jinpu Wang
  2020-08-20 10:43     ` Leon Romanovsky
  0 siblings, 1 reply; 6+ messages in thread
From: Jinpu Wang @ 2020-08-20  8:51 UTC (permalink / raw)
  To: Leon Romanovsky
  Cc: Md Haris Iqbal, Danil Kipnis, linux-rdma, Doug Ledford,
	Jason Gunthorpe, kernel test robot

Hi Leon,

On Thu, Aug 20, 2020 at 9:26 AM Leon Romanovsky <leon@kernel.org> wrote:
>
> On Thu, Aug 20, 2020 at 09:11:52AM +0530, Md Haris Iqbal wrote:
> > The rnbd_server module's communication manager (cm) initialization depends
> > on the registration of the "network namespace subsystem" of the RDMA CM
> > agent module. As such, when the kernel is configured to load the
> > rnbd_server and the RDMA cma module during initialization; and if the
> > rnbd_server module is initialized before RDMA cma module, a null ptr
> > dereference occurs during the RDMA bind operation.
> >
> > Call trace below,
> >
> > [    1.904782] Call Trace:
> > [    1.904782]  ? xas_load+0xd/0x80
> > [    1.904782]  xa_load+0x47/0x80
> > [    1.904782]  cma_ps_find+0x44/0x70
> > [    1.904782]  rdma_bind_addr+0x782/0x8b0
> > [    1.904782]  ? get_random_bytes+0x35/0x40
> > [    1.904782]  rtrs_srv_cm_init+0x50/0x80
> > [    1.904782]  rtrs_srv_open+0x102/0x180
> > [    1.904782]  ? rnbd_client_init+0x6e/0x6e
> > [    1.904782]  rnbd_srv_init_module+0x34/0x84
> > [    1.904782]  ? rnbd_client_init+0x6e/0x6e
> > [    1.904782]  do_one_initcall+0x4a/0x200
> > [    1.904782]  kernel_init_freeable+0x1f1/0x26e
> > [    1.904782]  ? rest_init+0xb0/0xb0
> > [    1.904782]  kernel_init+0xe/0x100
> > [    1.904782]  ret_from_fork+0x22/0x30
> > [    1.904782] Modules linked in:
> > [    1.904782] CR2: 0000000000000015
> > [    1.904782] ---[ end trace c42df88d6c7b0a48 ]---
> >
> > All this happens cause the cm init is in the call chain of the module init,
> > which is not a preferred practice.
> >
> > So remove the call to rdma_create_id() from the module init call chain.
> > Instead register rtrs-srv as an ib client, which makes sure that the
> > rdma_create_id() is called only when an ib device is added.
> >
> > Fixes: 9cb837480424 ("RDMA/rtrs: server: main functionality")
> > Reported-by: kernel test robot <rong.a.chen@intel.com>
> > Signed-off-by: Md Haris Iqbal <haris.iqbal@cloud.ionos.com>
> > ---
> > Change in v3:
> >       Removed RDMA init error check while rtrs server open
> >       Removed -1 assignment for ib_dev_count on RDMA init error
> > Change in v2:
> >         Use only single variable to track number of IB devices and failure
> >         Change according to kernel coding style
> >
> >  drivers/infiniband/ulp/rtrs/rtrs-srv.c | 69 ++++++++++++++++++++++++--
> >  drivers/infiniband/ulp/rtrs/rtrs-srv.h |  6 +++
> >  2 files changed, 72 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c
> > index a219bd1bdbc2..febc1478b96f 100644
> > --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c
> > +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c
> > @@ -16,6 +16,7 @@
> >  #include "rtrs-srv.h"
> >  #include "rtrs-log.h"
> >  #include <rdma/ib_cm.h>
> > +#include <rdma/ib_verbs.h>
> >
> >  MODULE_DESCRIPTION("RDMA Transport Server");
> >  MODULE_LICENSE("GPL");
> > @@ -31,6 +32,7 @@ MODULE_LICENSE("GPL");
> >  static struct rtrs_rdma_dev_pd dev_pd;
> >  static mempool_t *chunk_pool;
> >  struct class *rtrs_dev_class;
> > +static struct rtrs_srv_ib_ctx ib_ctx;
> >
> >  static int __read_mostly max_chunk_size = DEFAULT_MAX_CHUNK_SIZE;
> >  static int __read_mostly sess_queue_depth = DEFAULT_SESS_QUEUE_DEPTH;
> > @@ -2033,6 +2035,63 @@ static void free_srv_ctx(struct rtrs_srv_ctx *ctx)
> >       kfree(ctx);
> >  }
> >
> > +static int rtrs_srv_add_one(struct ib_device *device)
> > +{
> > +     struct rtrs_srv_ctx *ctx;
> > +     int ret;
> > +
> > +     if (ib_ctx.ib_dev_count)
> > +             goto out;
> > +
> > +     /*
> > +      * Since our CM IDs are NOT bound to any ib device we will create them
> > +      * only once
> > +      */
> > +     ctx = ib_ctx.srv_ctx;
> > +     ret = rtrs_srv_rdma_init(ctx, ib_ctx.port);
> > +     if (ret) {
> > +             /*
> > +              * We errored out here.
> > +              * According to the ib code, if we encounter an error here then the
> > +              * error code is ignored, and no more calls to our ops are made.
> > +              */
> > +             pr_err("Failed to initialize RDMA connection");
> > +             return ret;
> > +     }
> > +
> > +out:
> > +     /*
> > +      * Keep a track on the number of ib devices added
> > +      */
> > +     ib_ctx.ib_dev_count++;
> > +
> > +     return 0;
> > +}
> > +
> > +static void rtrs_srv_remove_one(struct ib_device *device, void *client_data)
> > +{
> > +     struct rtrs_srv_ctx *ctx;
> > +
> > +     ib_ctx.ib_dev_count--;
> > +
> > +     if (ib_ctx.ib_dev_count)
> > +             return;
> > +
> > +     /*
> > +      * Since our CM IDs are NOT bound to any ib device we will remove them
> > +      * only once, when the last device is removed
> > +      */
> > +     ctx = ib_ctx.srv_ctx;
> > +     rdma_destroy_id(ctx->cm_id_ip);
> > +     rdma_destroy_id(ctx->cm_id_ib);
> > +}
> > +
> > +static struct ib_client rtrs_srv_client = {
> > +     .name   = "rtrs_server",
> > +     .add    = rtrs_srv_add_one,
> > +     .remove = rtrs_srv_remove_one
> > +};
> > +
> >  /**
> >   * rtrs_srv_open() - open RTRS server context
> >   * @ops:             callback functions
> > @@ -2051,7 +2110,12 @@ struct rtrs_srv_ctx *rtrs_srv_open(struct rtrs_srv_ops *ops, u16 port)
> >       if (!ctx)
> >               return ERR_PTR(-ENOMEM);
> >
> > -     err = rtrs_srv_rdma_init(ctx, port);
> > +     ib_ctx = (struct rtrs_srv_ib_ctx) {
> > +             .srv_ctx        = ctx,
> > +             .port           = port,
> > +     };
>
> It can be my personal issue, but I prefer to see this type of assignment in variable declarations only.
> In the code, the better style will be to use direct assignment, e.g. "ib.ctx.port = port;"
> It will simplify future refactoring.
>
> Thanks

Thanks for your suggestion, but we have a few other place using the
same style, eg
https://elixir.bootlin.com/linux/latest/source/drivers/infiniband/ulp/rtrs/rtrs-srv.c#L1545

It's more consistent to keep it as it is, IMO.

Regards!

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v3] RDMA/rtrs-srv: Incorporate ib_register_client into rtrs server init
  2020-08-20  8:51   ` Jinpu Wang
@ 2020-08-20 10:43     ` Leon Romanovsky
  0 siblings, 0 replies; 6+ messages in thread
From: Leon Romanovsky @ 2020-08-20 10:43 UTC (permalink / raw)
  To: Jinpu Wang
  Cc: Md Haris Iqbal, Danil Kipnis, linux-rdma, Doug Ledford,
	Jason Gunthorpe, kernel test robot

On Thu, Aug 20, 2020 at 10:51:06AM +0200, Jinpu Wang wrote:
> Hi Leon,
>
> On Thu, Aug 20, 2020 at 9:26 AM Leon Romanovsky <leon@kernel.org> wrote:
> >
> > On Thu, Aug 20, 2020 at 09:11:52AM +0530, Md Haris Iqbal wrote:
> > > The rnbd_server module's communication manager (cm) initialization depends
> > > on the registration of the "network namespace subsystem" of the RDMA CM
> > > agent module. As such, when the kernel is configured to load the
> > > rnbd_server and the RDMA cma module during initialization; and if the
> > > rnbd_server module is initialized before RDMA cma module, a null ptr
> > > dereference occurs during the RDMA bind operation.
> > >
> > > Call trace below,
> > >
> > > [    1.904782] Call Trace:
> > > [    1.904782]  ? xas_load+0xd/0x80
> > > [    1.904782]  xa_load+0x47/0x80
> > > [    1.904782]  cma_ps_find+0x44/0x70
> > > [    1.904782]  rdma_bind_addr+0x782/0x8b0
> > > [    1.904782]  ? get_random_bytes+0x35/0x40
> > > [    1.904782]  rtrs_srv_cm_init+0x50/0x80
> > > [    1.904782]  rtrs_srv_open+0x102/0x180
> > > [    1.904782]  ? rnbd_client_init+0x6e/0x6e
> > > [    1.904782]  rnbd_srv_init_module+0x34/0x84
> > > [    1.904782]  ? rnbd_client_init+0x6e/0x6e
> > > [    1.904782]  do_one_initcall+0x4a/0x200
> > > [    1.904782]  kernel_init_freeable+0x1f1/0x26e
> > > [    1.904782]  ? rest_init+0xb0/0xb0
> > > [    1.904782]  kernel_init+0xe/0x100
> > > [    1.904782]  ret_from_fork+0x22/0x30
> > > [    1.904782] Modules linked in:
> > > [    1.904782] CR2: 0000000000000015
> > > [    1.904782] ---[ end trace c42df88d6c7b0a48 ]---
> > >
> > > All this happens cause the cm init is in the call chain of the module init,
> > > which is not a preferred practice.
> > >
> > > So remove the call to rdma_create_id() from the module init call chain.
> > > Instead register rtrs-srv as an ib client, which makes sure that the
> > > rdma_create_id() is called only when an ib device is added.
> > >
> > > Fixes: 9cb837480424 ("RDMA/rtrs: server: main functionality")
> > > Reported-by: kernel test robot <rong.a.chen@intel.com>
> > > Signed-off-by: Md Haris Iqbal <haris.iqbal@cloud.ionos.com>
> > > ---
> > > Change in v3:
> > >       Removed RDMA init error check while rtrs server open
> > >       Removed -1 assignment for ib_dev_count on RDMA init error
> > > Change in v2:
> > >         Use only single variable to track number of IB devices and failure
> > >         Change according to kernel coding style
> > >
> > >  drivers/infiniband/ulp/rtrs/rtrs-srv.c | 69 ++++++++++++++++++++++++--
> > >  drivers/infiniband/ulp/rtrs/rtrs-srv.h |  6 +++
> > >  2 files changed, 72 insertions(+), 3 deletions(-)
> > >
> > > diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c
> > > index a219bd1bdbc2..febc1478b96f 100644
> > > --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c
> > > +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c
> > > @@ -16,6 +16,7 @@
> > >  #include "rtrs-srv.h"
> > >  #include "rtrs-log.h"
> > >  #include <rdma/ib_cm.h>
> > > +#include <rdma/ib_verbs.h>
> > >
> > >  MODULE_DESCRIPTION("RDMA Transport Server");
> > >  MODULE_LICENSE("GPL");
> > > @@ -31,6 +32,7 @@ MODULE_LICENSE("GPL");
> > >  static struct rtrs_rdma_dev_pd dev_pd;
> > >  static mempool_t *chunk_pool;
> > >  struct class *rtrs_dev_class;
> > > +static struct rtrs_srv_ib_ctx ib_ctx;
> > >
> > >  static int __read_mostly max_chunk_size = DEFAULT_MAX_CHUNK_SIZE;
> > >  static int __read_mostly sess_queue_depth = DEFAULT_SESS_QUEUE_DEPTH;
> > > @@ -2033,6 +2035,63 @@ static void free_srv_ctx(struct rtrs_srv_ctx *ctx)
> > >       kfree(ctx);
> > >  }
> > >
> > > +static int rtrs_srv_add_one(struct ib_device *device)
> > > +{
> > > +     struct rtrs_srv_ctx *ctx;
> > > +     int ret;
> > > +
> > > +     if (ib_ctx.ib_dev_count)
> > > +             goto out;
> > > +
> > > +     /*
> > > +      * Since our CM IDs are NOT bound to any ib device we will create them
> > > +      * only once
> > > +      */
> > > +     ctx = ib_ctx.srv_ctx;
> > > +     ret = rtrs_srv_rdma_init(ctx, ib_ctx.port);
> > > +     if (ret) {
> > > +             /*
> > > +              * We errored out here.
> > > +              * According to the ib code, if we encounter an error here then the
> > > +              * error code is ignored, and no more calls to our ops are made.
> > > +              */
> > > +             pr_err("Failed to initialize RDMA connection");
> > > +             return ret;
> > > +     }
> > > +
> > > +out:
> > > +     /*
> > > +      * Keep a track on the number of ib devices added
> > > +      */
> > > +     ib_ctx.ib_dev_count++;
> > > +
> > > +     return 0;
> > > +}
> > > +
> > > +static void rtrs_srv_remove_one(struct ib_device *device, void *client_data)
> > > +{
> > > +     struct rtrs_srv_ctx *ctx;
> > > +
> > > +     ib_ctx.ib_dev_count--;
> > > +
> > > +     if (ib_ctx.ib_dev_count)
> > > +             return;
> > > +
> > > +     /*
> > > +      * Since our CM IDs are NOT bound to any ib device we will remove them
> > > +      * only once, when the last device is removed
> > > +      */
> > > +     ctx = ib_ctx.srv_ctx;
> > > +     rdma_destroy_id(ctx->cm_id_ip);
> > > +     rdma_destroy_id(ctx->cm_id_ib);
> > > +}
> > > +
> > > +static struct ib_client rtrs_srv_client = {
> > > +     .name   = "rtrs_server",
> > > +     .add    = rtrs_srv_add_one,
> > > +     .remove = rtrs_srv_remove_one
> > > +};
> > > +
> > >  /**
> > >   * rtrs_srv_open() - open RTRS server context
> > >   * @ops:             callback functions
> > > @@ -2051,7 +2110,12 @@ struct rtrs_srv_ctx *rtrs_srv_open(struct rtrs_srv_ops *ops, u16 port)
> > >       if (!ctx)
> > >               return ERR_PTR(-ENOMEM);
> > >
> > > -     err = rtrs_srv_rdma_init(ctx, port);
> > > +     ib_ctx = (struct rtrs_srv_ib_ctx) {
> > > +             .srv_ctx        = ctx,
> > > +             .port           = port,
> > > +     };
> >
> > It can be my personal issue, but I prefer to see this type of assignment in variable declarations only.
> > In the code, the better style will be to use direct assignment, e.g. "ib.ctx.port = port;"
> > It will simplify future refactoring.
> >
> > Thanks
>
> Thanks for your suggestion, but we have a few other place using the
> same style, eg
> https://elixir.bootlin.com/linux/latest/source/drivers/infiniband/ulp/rtrs/rtrs-srv.c#L1545
>
> It's more consistent to keep it as it is, IMO.

You are submitting new code, better to fix.

Thanks

>
> Regards!

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v3] RDMA/rtrs-srv: Incorporate ib_register_client into rtrs server init
  2020-08-20  3:41 [PATCH v3] RDMA/rtrs-srv: Incorporate ib_register_client into rtrs server init Md Haris Iqbal
  2020-08-20  7:26 ` Leon Romanovsky
@ 2020-08-27 11:45 ` Jason Gunthorpe
  2020-08-27 12:01   ` Haris Iqbal
  1 sibling, 1 reply; 6+ messages in thread
From: Jason Gunthorpe @ 2020-08-27 11:45 UTC (permalink / raw)
  To: Md Haris Iqbal
  Cc: danil.kipnis, jinpu.wang, linux-rdma, dledford, leon, kernel test robot

On Thu, Aug 20, 2020 at 09:11:52AM +0530, Md Haris Iqbal wrote:
  
> +static int rtrs_srv_add_one(struct ib_device *device)
> +{
> +	struct rtrs_srv_ctx *ctx;
> +	int ret;
> +
> +	if (ib_ctx.ib_dev_count)
> +		goto out;

Add's can run concurrently with remove and with other adds so this
unlocked variable is racing

Jason

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v3] RDMA/rtrs-srv: Incorporate ib_register_client into rtrs server init
  2020-08-27 11:45 ` Jason Gunthorpe
@ 2020-08-27 12:01   ` Haris Iqbal
  0 siblings, 0 replies; 6+ messages in thread
From: Haris Iqbal @ 2020-08-27 12:01 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Danil Kipnis, Jinpu Wang, linux-rdma, Doug Ledford,
	Leon Romanovsky, kernel test robot

On Thu, Aug 27, 2020 at 5:15 PM Jason Gunthorpe <jgg@nvidia.com> wrote:
>
> On Thu, Aug 20, 2020 at 09:11:52AM +0530, Md Haris Iqbal wrote:
>
> > +static int rtrs_srv_add_one(struct ib_device *device)
> > +{
> > +     struct rtrs_srv_ctx *ctx;
> > +     int ret;
> > +
> > +     if (ib_ctx.ib_dev_count)
> > +             goto out;
>
> Add's can run concurrently with remove and with other adds so this
> unlocked variable is racing

Hmm.. Didn't know that. Thanks, will update the patch and send.

>
> Jason



-- 

Regards
-Haris

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2020-08-27 15:11 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-08-20  3:41 [PATCH v3] RDMA/rtrs-srv: Incorporate ib_register_client into rtrs server init Md Haris Iqbal
2020-08-20  7:26 ` Leon Romanovsky
2020-08-20  8:51   ` Jinpu Wang
2020-08-20 10:43     ` Leon Romanovsky
2020-08-27 11:45 ` Jason Gunthorpe
2020-08-27 12:01   ` Haris Iqbal

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).