* [PATCH RESEND rdma-rc] RDMA/mlx5: Add dummy umem to IB_MR_TYPE_DM
@ 2021-10-19 10:23 Leon Romanovsky
2021-10-25 17:31 ` Jason Gunthorpe
0 siblings, 1 reply; 3+ messages in thread
From: Leon Romanovsky @ 2021-10-19 10:23 UTC (permalink / raw)
To: Doug Ledford, Jason Gunthorpe; +Cc: Alaa Hleihel, linux-kernel, linux-rdma
From: Alaa Hleihel <alaa@nvidia.com>
After the cited patch, and for the case of IB_MR_TYPE_DM that doesn't
have a umem (even though it is a user MR), function mlx5_free_priv_descs()
will think that it's a kernel MR, leading to wrongly accessing mr->descs
that will get wrong values in the union which leads to attempting to
release resources that were not allocated in the first place.
For example:
DMA-API: mlx5_core 0000:08:00.1: device driver tries to free DMA memory it has not allocated [device address=0x0000000000000000] [size=0 bytes]
WARNING: CPU: 8 PID: 1021 at kernel/dma/debug.c:961 check_unmap+0x54f/0x8b0
RIP: 0010:check_unmap+0x54f/0x8b0
Call Trace:
debug_dma_unmap_page+0x57/0x60
mlx5_free_priv_descs+0x57/0x70 [mlx5_ib]
mlx5_ib_dereg_mr+0x1fb/0x3d0 [mlx5_ib]
ib_dereg_mr_user+0x60/0x140 [ib_core]
uverbs_destroy_uobject+0x59/0x210 [ib_uverbs]
uobj_destroy+0x3f/0x80 [ib_uverbs]
ib_uverbs_cmd_verbs+0x435/0xd10 [ib_uverbs]
? uverbs_finalize_object+0x50/0x50 [ib_uverbs]
? lock_acquire+0xc4/0x2e0
? lock_acquired+0x12/0x380
? lock_acquire+0xc4/0x2e0
? lock_acquire+0xc4/0x2e0
? ib_uverbs_ioctl+0x7c/0x140 [ib_uverbs]
? lock_release+0x28a/0x400
ib_uverbs_ioctl+0xc0/0x140 [ib_uverbs]
? ib_uverbs_ioctl+0x7c/0x140 [ib_uverbs]
__x64_sys_ioctl+0x7f/0xb0
do_syscall_64+0x38/0x90
Fix it by adding a dummy umem to IB_MR_TYPE_DM MRs.
Fixes: f18ec4223117 ("RDMA/mlx5: Use a union inside mlx5_ib_mr")
Signed-off-by: Alaa Hleihel <alaa@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
RESEND: https://lore.kernel.org/all/9c6478b70dc23cfec3a7bfc345c30ff817e7e799.1631660866.git.leonro@nvidia.com
Our request to drop that original patch was because mr-->umem pointer is checked
in rereg flow for the DM MRs with expectation to have NULL there. However DM is
blocked for the rereg path in the commit 5ccbf63f87a3 ("IB/uverbs: Prevent
reregistration of DM_MR to regular MR"), and the checks in mlx5_ib are redundant.
Thanks
---
drivers/infiniband/core/umem.c | 21 +++++++++++++++++++++
drivers/infiniband/hw/mlx5/mr.c | 5 +++++
include/rdma/ib_umem.h | 5 +++++
3 files changed, 31 insertions(+)
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 86d479772fbc..a2f9c922bdd9 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -260,6 +260,27 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
}
EXPORT_SYMBOL(ib_umem_get);
+/**
+ * ib_umem_get_dummy - Create an empty umem
+ *
+ * @device: IB device to connect UMEM
+ */
+struct ib_umem *ib_umem_get_dummy(struct ib_device *device)
+{
+ struct ib_umem *umem;
+
+ umem = kzalloc(sizeof(*umem), GFP_KERNEL);
+ if (!umem)
+ return ERR_PTR(-ENOMEM);
+
+ umem->ibdev = device;
+ umem->owning_mm = current->mm;
+ mmgrab(umem->owning_mm);
+
+ return umem;
+}
+EXPORT_SYMBOL(ib_umem_get_dummy);
+
/**
* ib_umem_release - release memory pinned with ib_umem_get
* @umem: umem struct to release
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 3be36ebbf67a..6fbc281a8881 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1389,6 +1389,11 @@ static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr,
kfree(in);
set_mr_fields(dev, mr, length, acc);
+ mr->umem = ib_umem_get_dummy(&dev->ib_dev);
+ if (IS_ERR(mr->umem)) {
+ err = PTR_ERR(mr->umem);
+ goto err_free;
+ }
return &mr->ibmr;
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
index 5ae9dff74dac..fd20b1610050 100644
--- a/include/rdma/ib_umem.h
+++ b/include/rdma/ib_umem.h
@@ -98,6 +98,7 @@ static inline void __rdma_umem_block_iter_start(struct ib_block_iter *biter,
struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
size_t size, int access);
+struct ib_umem *ib_umem_get_dummy(struct ib_device *device);
void ib_umem_release(struct ib_umem *umem);
int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
size_t length);
@@ -153,6 +154,10 @@ static inline struct ib_umem *ib_umem_get(struct ib_device *device,
{
return ERR_PTR(-EOPNOTSUPP);
}
+static struct ib_umem *ib_umem_get_dummy(struct ib_device *device)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
static inline void ib_umem_release(struct ib_umem *umem) { }
static inline int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
size_t length) {
--
2.31.1
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH RESEND rdma-rc] RDMA/mlx5: Add dummy umem to IB_MR_TYPE_DM
2021-10-19 10:23 [PATCH RESEND rdma-rc] RDMA/mlx5: Add dummy umem to IB_MR_TYPE_DM Leon Romanovsky
@ 2021-10-25 17:31 ` Jason Gunthorpe
2021-10-26 11:23 ` Leon Romanovsky
0 siblings, 1 reply; 3+ messages in thread
From: Jason Gunthorpe @ 2021-10-25 17:31 UTC (permalink / raw)
To: Leon Romanovsky; +Cc: Doug Ledford, Alaa Hleihel, linux-kernel, linux-rdma
On Tue, Oct 19, 2021 at 01:23:13PM +0300, Leon Romanovsky wrote:
> From: Alaa Hleihel <alaa@nvidia.com>
>
> After the cited patch, and for the case of IB_MR_TYPE_DM that doesn't
> have a umem (even though it is a user MR), function mlx5_free_priv_descs()
> will think that it's a kernel MR, leading to wrongly accessing mr->descs
> that will get wrong values in the union which leads to attempting to
> release resources that were not allocated in the first place.
>
> For example:
> DMA-API: mlx5_core 0000:08:00.1: device driver tries to free DMA memory it has not allocated [device address=0x0000000000000000] [size=0 bytes]
> WARNING: CPU: 8 PID: 1021 at kernel/dma/debug.c:961 check_unmap+0x54f/0x8b0
> RIP: 0010:check_unmap+0x54f/0x8b0
> Call Trace:
> debug_dma_unmap_page+0x57/0x60
> mlx5_free_priv_descs+0x57/0x70 [mlx5_ib]
> mlx5_ib_dereg_mr+0x1fb/0x3d0 [mlx5_ib]
> ib_dereg_mr_user+0x60/0x140 [ib_core]
> uverbs_destroy_uobject+0x59/0x210 [ib_uverbs]
> uobj_destroy+0x3f/0x80 [ib_uverbs]
> ib_uverbs_cmd_verbs+0x435/0xd10 [ib_uverbs]
> ? uverbs_finalize_object+0x50/0x50 [ib_uverbs]
> ? lock_acquire+0xc4/0x2e0
> ? lock_acquired+0x12/0x380
> ? lock_acquire+0xc4/0x2e0
> ? lock_acquire+0xc4/0x2e0
> ? ib_uverbs_ioctl+0x7c/0x140 [ib_uverbs]
> ? lock_release+0x28a/0x400
> ib_uverbs_ioctl+0xc0/0x140 [ib_uverbs]
> ? ib_uverbs_ioctl+0x7c/0x140 [ib_uverbs]
> __x64_sys_ioctl+0x7f/0xb0
> do_syscall_64+0x38/0x90
>
> Fix it by adding a dummy umem to IB_MR_TYPE_DM MRs.
>
> Fixes: f18ec4223117 ("RDMA/mlx5: Use a union inside mlx5_ib_mr")
> Signed-off-by: Alaa Hleihel <alaa@nvidia.com>
> Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
> RESEND: https://lore.kernel.org/all/9c6478b70dc23cfec3a7bfc345c30ff817e7e799.1631660866.git.leonro@nvidia.com
>
> Our request to drop that original patch was because mr-->umem pointer is checked
> in rereg flow for the DM MRs with expectation to have NULL there. However DM is
> blocked for the rereg path in the commit 5ccbf63f87a3 ("IB/uverbs: Prevent
> reregistration of DM_MR to regular MR"), and the checks in mlx5_ib are redundant.
That logic in the core code is bogus and should be deleted.
It is perfeclty fine to use rereg to change the access flags on a DM
MR and mlx5 now implements that.
So let's not go down a path that blocks it.
Like this instead:
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index e636e954f6bf2a..4a7a56ed740b9b 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -664,7 +664,6 @@ struct mlx5_ib_mr {
/* User MR data */
struct mlx5_cache_ent *cache_ent;
- struct ib_umem *umem;
/* This is zero'd when the MR is allocated */
union {
@@ -676,7 +675,7 @@ struct mlx5_ib_mr {
struct list_head list;
};
- /* Used only by kernel MRs (umem == NULL) */
+ /* Used only by kernel MRs */
struct {
void *descs;
void *descs_alloc;
@@ -697,8 +696,9 @@ struct mlx5_ib_mr {
int data_length;
};
- /* Used only by User MRs (umem != NULL) */
+ /* Used only by User MRs */
struct {
+ struct ib_umem *umem;
unsigned int page_shift;
/* Current access_flags */
int access_flags;
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 221f0949794e35..997d133d00369d 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1904,19 +1904,19 @@ mlx5_alloc_priv_descs(struct ib_device *device,
return ret;
}
-static void
-mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
+static void mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
{
- if (!mr->umem && mr->descs) {
- struct ib_device *device = mr->ibmr.device;
- int size = mr->max_descs * mr->desc_size;
- struct mlx5_ib_dev *dev = to_mdev(device);
+ struct ib_device *device = mr->ibmr.device;
+ int size = mr->max_descs * mr->desc_size;
+ struct mlx5_ib_dev *dev = to_mdev(device);
- dma_unmap_single(&dev->mdev->pdev->dev, mr->desc_map, size,
- DMA_TO_DEVICE);
- kfree(mr->descs_alloc);
- mr->descs = NULL;
- }
+ if (!mr->descs)
+ return;
+
+ dma_unmap_single(&dev->mdev->pdev->dev, mr->desc_map, size,
+ DMA_TO_DEVICE);
+ kfree(mr->descs_alloc);
+ mr->descs = NULL;
}
int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
@@ -1978,7 +1978,7 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
return rc;
}
- if (mr->umem) {
+ if (udata && mr->umem) {
bool is_odp = is_odp_mr(mr);
if (!is_odp)
@@ -1992,7 +1992,8 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
if (mr->cache_ent) {
mlx5_mr_cache_free(dev, mr);
} else {
- mlx5_free_priv_descs(mr);
+ if (!udata)
+ mlx5_free_priv_descs(mr);
kfree(mr);
}
return 0;
@@ -2079,7 +2080,6 @@ static struct mlx5_ib_mr *mlx5_ib_alloc_pi_mr(struct ib_pd *pd,
if (err)
goto err_free_in;
- mr->umem = NULL;
kfree(in);
return mr;
@@ -2206,7 +2206,6 @@ static struct ib_mr *__mlx5_ib_alloc_mr(struct ib_pd *pd,
}
mr->ibmr.device = pd->device;
- mr->umem = NULL;
switch (mr_type) {
case IB_MR_TYPE_MEM_REG:
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH RESEND rdma-rc] RDMA/mlx5: Add dummy umem to IB_MR_TYPE_DM
2021-10-25 17:31 ` Jason Gunthorpe
@ 2021-10-26 11:23 ` Leon Romanovsky
0 siblings, 0 replies; 3+ messages in thread
From: Leon Romanovsky @ 2021-10-26 11:23 UTC (permalink / raw)
To: Jason Gunthorpe; +Cc: Doug Ledford, Alaa Hleihel, linux-kernel, linux-rdma
On Mon, Oct 25, 2021 at 02:31:50PM -0300, Jason Gunthorpe wrote:
> On Tue, Oct 19, 2021 at 01:23:13PM +0300, Leon Romanovsky wrote:
> > From: Alaa Hleihel <alaa@nvidia.com>
> >
> > After the cited patch, and for the case of IB_MR_TYPE_DM that doesn't
> > have a umem (even though it is a user MR), function mlx5_free_priv_descs()
> > will think that it's a kernel MR, leading to wrongly accessing mr->descs
> > that will get wrong values in the union which leads to attempting to
> > release resources that were not allocated in the first place.
> >
> > For example:
> > DMA-API: mlx5_core 0000:08:00.1: device driver tries to free DMA memory it has not allocated [device address=0x0000000000000000] [size=0 bytes]
> > WARNING: CPU: 8 PID: 1021 at kernel/dma/debug.c:961 check_unmap+0x54f/0x8b0
> > RIP: 0010:check_unmap+0x54f/0x8b0
> > Call Trace:
> > debug_dma_unmap_page+0x57/0x60
> > mlx5_free_priv_descs+0x57/0x70 [mlx5_ib]
> > mlx5_ib_dereg_mr+0x1fb/0x3d0 [mlx5_ib]
> > ib_dereg_mr_user+0x60/0x140 [ib_core]
> > uverbs_destroy_uobject+0x59/0x210 [ib_uverbs]
> > uobj_destroy+0x3f/0x80 [ib_uverbs]
> > ib_uverbs_cmd_verbs+0x435/0xd10 [ib_uverbs]
> > ? uverbs_finalize_object+0x50/0x50 [ib_uverbs]
> > ? lock_acquire+0xc4/0x2e0
> > ? lock_acquired+0x12/0x380
> > ? lock_acquire+0xc4/0x2e0
> > ? lock_acquire+0xc4/0x2e0
> > ? ib_uverbs_ioctl+0x7c/0x140 [ib_uverbs]
> > ? lock_release+0x28a/0x400
> > ib_uverbs_ioctl+0xc0/0x140 [ib_uverbs]
> > ? ib_uverbs_ioctl+0x7c/0x140 [ib_uverbs]
> > __x64_sys_ioctl+0x7f/0xb0
> > do_syscall_64+0x38/0x90
> >
> > Fix it by adding a dummy umem to IB_MR_TYPE_DM MRs.
> >
> > Fixes: f18ec4223117 ("RDMA/mlx5: Use a union inside mlx5_ib_mr")
> > Signed-off-by: Alaa Hleihel <alaa@nvidia.com>
> > Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
> > RESEND: https://lore.kernel.org/all/9c6478b70dc23cfec3a7bfc345c30ff817e7e799.1631660866.git.leonro@nvidia.com
> >
> > Our request to drop that original patch was because mr-->umem pointer is checked
> > in rereg flow for the DM MRs with expectation to have NULL there. However DM is
> > blocked for the rereg path in the commit 5ccbf63f87a3 ("IB/uverbs: Prevent
> > reregistration of DM_MR to regular MR"), and the checks in mlx5_ib are redundant.
>
> That logic in the core code is bogus and should be deleted.
>
> It is perfeclty fine to use rereg to change the access flags on a DM
> MR and mlx5 now implements that.
>
> So let's not go down a path that blocks it.
>
> Like this instead:
Thanks, let's give a try.
>
> diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
> index e636e954f6bf2a..4a7a56ed740b9b 100644
> --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
> +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
> @@ -664,7 +664,6 @@ struct mlx5_ib_mr {
>
> /* User MR data */
> struct mlx5_cache_ent *cache_ent;
> - struct ib_umem *umem;
>
> /* This is zero'd when the MR is allocated */
> union {
> @@ -676,7 +675,7 @@ struct mlx5_ib_mr {
> struct list_head list;
> };
>
> - /* Used only by kernel MRs (umem == NULL) */
> + /* Used only by kernel MRs */
> struct {
> void *descs;
> void *descs_alloc;
> @@ -697,8 +696,9 @@ struct mlx5_ib_mr {
> int data_length;
> };
>
> - /* Used only by User MRs (umem != NULL) */
> + /* Used only by User MRs */
> struct {
> + struct ib_umem *umem;
> unsigned int page_shift;
> /* Current access_flags */
> int access_flags;
> diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
> index 221f0949794e35..997d133d00369d 100644
> --- a/drivers/infiniband/hw/mlx5/mr.c
> +++ b/drivers/infiniband/hw/mlx5/mr.c
> @@ -1904,19 +1904,19 @@ mlx5_alloc_priv_descs(struct ib_device *device,
> return ret;
> }
>
> -static void
> -mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
> +static void mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
> {
> - if (!mr->umem && mr->descs) {
> - struct ib_device *device = mr->ibmr.device;
> - int size = mr->max_descs * mr->desc_size;
> - struct mlx5_ib_dev *dev = to_mdev(device);
> + struct ib_device *device = mr->ibmr.device;
> + int size = mr->max_descs * mr->desc_size;
> + struct mlx5_ib_dev *dev = to_mdev(device);
>
> - dma_unmap_single(&dev->mdev->pdev->dev, mr->desc_map, size,
> - DMA_TO_DEVICE);
> - kfree(mr->descs_alloc);
> - mr->descs = NULL;
> - }
> + if (!mr->descs)
> + return;
> +
> + dma_unmap_single(&dev->mdev->pdev->dev, mr->desc_map, size,
> + DMA_TO_DEVICE);
> + kfree(mr->descs_alloc);
> + mr->descs = NULL;
> }
>
> int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
> @@ -1978,7 +1978,7 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
> return rc;
> }
>
> - if (mr->umem) {
> + if (udata && mr->umem) {
> bool is_odp = is_odp_mr(mr);
>
> if (!is_odp)
> @@ -1992,7 +1992,8 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
> if (mr->cache_ent) {
> mlx5_mr_cache_free(dev, mr);
> } else {
> - mlx5_free_priv_descs(mr);
> + if (!udata)
> + mlx5_free_priv_descs(mr);
> kfree(mr);
> }
> return 0;
> @@ -2079,7 +2080,6 @@ static struct mlx5_ib_mr *mlx5_ib_alloc_pi_mr(struct ib_pd *pd,
> if (err)
> goto err_free_in;
>
> - mr->umem = NULL;
> kfree(in);
>
> return mr;
> @@ -2206,7 +2206,6 @@ static struct ib_mr *__mlx5_ib_alloc_mr(struct ib_pd *pd,
> }
>
> mr->ibmr.device = pd->device;
> - mr->umem = NULL;
>
> switch (mr_type) {
> case IB_MR_TYPE_MEM_REG:
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2021-10-26 11:23 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-10-19 10:23 [PATCH RESEND rdma-rc] RDMA/mlx5: Add dummy umem to IB_MR_TYPE_DM Leon Romanovsky
2021-10-25 17:31 ` Jason Gunthorpe
2021-10-26 11:23 ` Leon Romanovsky
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).