All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] RDMA: Fix software RDMA drivers for dma mapping error
@ 2020-10-30  9:38 Parav Pandit
  2020-10-30  9:48 ` Guoqing Jiang
                   ` (5 more replies)
  0 siblings, 6 replies; 14+ messages in thread
From: Parav Pandit @ 2020-10-30  9:38 UTC (permalink / raw)
  To: dennis.dalessandro, mike.marciniszyn, dledford, jgg, yanjunz,
	bmt, linux-rdma
  Cc: hch, Parav Pandit, syzbot+34dc2fea3478e659af01

A cited commit in fixes tag avoided setting dma_mask of the ib_device.
Commit [1] made dma_mask as mandetory field to be setup even for
dma_virt_ops based dma devices. Due to which below call trace occurred.

Fix it by setting empty DMA MASK for software based RDMA devices.

WARNING: CPU: 1 PID: 8488 at kernel/dma/mapping.c:149
dma_map_page_attrs+0x493/0x700 kernel/dma/mapping.c:149 Modules linked in:
CPU: 1 PID: 8488 Comm: syz-executor144 Not tainted 5.9.0-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine,
BIOS Google 01/01/2011
RIP: 0010:dma_map_page_attrs+0x493/0x700 kernel/dma/mapping.c:149
Trace:
 dma_map_single_attrs include/linux/dma-mapping.h:279 [inline]
ib_dma_map_single include/rdma/ib_verbs.h:3967 [inline]
 ib_mad_post_receive_mads+0x23f/0xd60
drivers/infiniband/core/mad.c:2715
 ib_mad_port_start drivers/infiniband/core/mad.c:2862 [inline]
ib_mad_port_open drivers/infiniband/core/mad.c:3016 [inline]
 ib_mad_init_device+0x72b/0x1400 drivers/infiniband/core/mad.c:3092
 add_client_context+0x405/0x5e0 drivers/infiniband/core/device.c:680
 enable_device_and_get+0x1d5/0x3c0
drivers/infiniband/core/device.c:1301
 ib_register_device drivers/infiniband/core/device.c:1376 [inline]
 ib_register_device+0x7a7/0xa40 drivers/infiniband/core/device.c:1335
 rxe_register_device+0x46d/0x570
drivers/infiniband/sw/rxe/rxe_verbs.c:1182
 rxe_add+0x12fe/0x16d0 drivers/infiniband/sw/rxe/rxe.c:247
 rxe_net_add+0x8c/0xe0 drivers/infiniband/sw/rxe/rxe_net.c:507
 rxe_newlink drivers/infiniband/sw/rxe/rxe.c:269 [inline]
 rxe_newlink+0xb7/0xe0 drivers/infiniband/sw/rxe/rxe.c:250
 nldev_newlink+0x30e/0x540 drivers/infiniband/core/nldev.c:1555
 rdma_nl_rcv_msg+0x367/0x690 drivers/infiniband/core/netlink.c:195
 rdma_nl_rcv_skb drivers/infiniband/core/netlink.c:239 [inline]
 rdma_nl_rcv+0x2f2/0x440 drivers/infiniband/core/netlink.c:259
 netlink_unicast_kernel net/netlink/af_netlink.c:1304 [inline]
 netlink_unicast+0x533/0x7d0 net/netlink/af_netlink.c:1330
 netlink_sendmsg+0x856/0xd90 net/netlink/af_netlink.c:1919
sock_sendmsg_nosec net/socket.c:651 [inline]
 sock_sendmsg+0xcf/0x120 net/socket.c:671
 ____sys_sendmsg+0x6e8/0x810 net/socket.c:2353
 ___sys_sendmsg+0xf3/0x170 net/socket.c:2407
 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2440
 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
 entry_SYSCALL_64_after_hwframe+0x44/0xa9
RIP: 0033:0x443699

[1] commit f959dcd6ddfd ("dma-direct: Fix potential NULL pointer dereference")

Reported-by: syzbot+34dc2fea3478e659af01@syzkaller.appspotmail.com
Fixes: e0477b34d9d1 ("RDMA: Explicitly pass in the dma_device to ib_register_device")
Signed-off-by: Parav Pandit <parav@nvidia.com>
---
 drivers/infiniband/sw/rdmavt/vt.c     | 7 +++++--
 drivers/infiniband/sw/rxe/rxe_verbs.c | 6 +++++-
 drivers/infiniband/sw/siw/siw_main.c  | 7 +++++--
 3 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c
index 43fbc4e54edf..5bd817490b1f 100644
--- a/drivers/infiniband/sw/rdmavt/vt.c
+++ b/drivers/infiniband/sw/rdmavt/vt.c
@@ -525,6 +525,7 @@ static noinline int check_support(struct rvt_dev_info *rdi, int verb)
 int rvt_register_device(struct rvt_dev_info *rdi)
 {
 	int ret = 0, i;
+	u64 dma_mask;
 
 	if (!rdi)
 		return -EINVAL;
@@ -581,8 +582,10 @@ int rvt_register_device(struct rvt_dev_info *rdi)
 
 	/* DMA Operations */
 	rdi->ibdev.dev.dma_parms = rdi->ibdev.dev.parent->dma_parms;
-	dma_set_coherent_mask(&rdi->ibdev.dev,
-			      rdi->ibdev.dev.parent->coherent_dma_mask);
+	dma_mask = IS_ENABLED(CONFIG_64BIT) ? DMA_BIT_MASK(64) : DMA_BIT_MASK(32);
+	ret = dma_coerce_mask_and_coherent(&rdi->ibdev.dev, dma_mask);
+	if (ret)
+		goto bail_wss;
 
 	/* Protection Domain */
 	spin_lock_init(&rdi->n_pds_lock);
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 7652d53af2c1..50ad3dded786 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -1128,6 +1128,7 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name)
 	int err;
 	struct ib_device *dev = &rxe->ib_dev;
 	struct crypto_shash *tfm;
+	u64 dma_mask;
 
 	strlcpy(dev->node_desc, "rxe", sizeof(dev->node_desc));
 
@@ -1140,7 +1141,10 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name)
 			    rxe->ndev->dev_addr);
 	dev->dev.dma_parms = &rxe->dma_parms;
 	dma_set_max_seg_size(&dev->dev, UINT_MAX);
-	dma_set_coherent_mask(&dev->dev, dma_get_required_mask(&dev->dev));
+	dma_mask = IS_ENABLED(CONFIG_64BIT) ? DMA_BIT_MASK(64) : DMA_BIT_MASK(32);
+	err = dma_coerce_mask_and_coherent(&dev->dev, dma_mask);
+	if (err)
+		return err;
 
 	dev->uverbs_cmd_mask |= BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ);
 
diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c
index e49faefdee92..6fe120187238 100644
--- a/drivers/infiniband/sw/siw/siw_main.c
+++ b/drivers/infiniband/sw/siw/siw_main.c
@@ -306,6 +306,7 @@ static struct siw_device *siw_device_create(struct net_device *netdev)
 	struct siw_device *sdev = NULL;
 	struct ib_device *base_dev;
 	struct device *parent = netdev->dev.parent;
+	u64 dma_mask;
 	int rv;
 
 	if (!parent) {
@@ -360,8 +361,10 @@ static struct siw_device *siw_device_create(struct net_device *netdev)
 	base_dev->dev.parent = parent;
 	base_dev->dev.dma_parms = &sdev->dma_parms;
 	dma_set_max_seg_size(&base_dev->dev, UINT_MAX);
-	dma_set_coherent_mask(&base_dev->dev,
-			      dma_get_required_mask(&base_dev->dev));
+	dma_mask = IS_ENABLED(CONFIG_64BIT) ? DMA_BIT_MASK(64) : DMA_BIT_MASK(32);
+	if (dma_coerce_mask_and_coherent(&base_dev->dev, dma_mask))
+		goto error;
+
 	base_dev->num_comp_vectors = num_possible_cpus();
 
 	xa_init_flags(&sdev->qp_xa, XA_FLAGS_ALLOC1);
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 14+ messages in thread
* RE: WARNING in dma_map_page_attrs
@ 2020-10-27 12:52 Parav Pandit
  2020-10-28  8:03   ` kernel test robot
  0 siblings, 1 reply; 14+ messages in thread
From: Parav Pandit @ 2020-10-27 12:52 UTC (permalink / raw)
  To: hch
  Cc: Jakub Kicinski, syzbot, christian.koenig, dri-devel, iommu,
	linaro-mm-sig-owner, linaro-mm-sig, linux-kernel, linux-media,
	m.szyprowski, netdev, robin.murphy, sumit.semwal, syzkaller-bugs,
	linux-rdma


> From: hch@lst.de <hch@lst.de>
> Sent: Tuesday, October 27, 2020 1:41 PM
> 
> On Mon, Oct 26, 2020 at 05:23:48AM +0000, Parav Pandit wrote:
> > Hi Christoph,
> >
> > > From: Jakub Kicinski <kuba@kernel.org>
> > > Sent: Saturday, October 24, 2020 11:45 PM
> > >
> > > CC: rdma, looks like rdma from the stack trace
> > >
> > > On Fri, 23 Oct 2020 20:07:17 -0700 syzbot wrote:
> > > > syzbot has found a reproducer for the following issue on:
> > > >
> > > > HEAD commit:    3cb12d27 Merge tag 'net-5.10-rc1' of
> git://git.kernel.org/..
> >
> > In [1] you mentioned that dma_mask should not be set for dma_virt_ops.
> > So patch [2] removed it.
> >
> > But check to validate the dma mask for all dma_ops was added in [3].
> >
> > What is the right way? Did I misunderstood your comment about
> dma_mask in [1]?
> 
> No, I did not say we don't need the mask.  I said copying over the various
> dma-related fields from the parent is bogus.
> 
> I think rxe (and ther other drivers/infiniband/sw drivers) need a simple
> dma_coerce_mask_and_coherent and nothing else.

I see. Does below fix make sense?
Is DMA_MASK_NONE correct?

From cfad78c35788b4ff604abedd96559500c5fd2a72 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Tue, 27 Oct 2020 14:20:07 +0200
Subject: [PATCH] RDMA: Fix software RDMA drivers for dma mapping error

A cited commit in fixes tag avoided setting dma_mask of the ib_device.
Commit [1] made dma_mask as mandetory field to be setup even for
dma_virt_ops based dma devices.

Fix it by setting empty DMA MASK for software based RDMA devices.

[1] commit: f959dcd6ddfd2 ("dma-direct: Fix potential NULL pointer dereference")

Reported-by: syzbot+34dc2fea3478e659af01@syzkaller.appspotmail.com
Fixes: e0477b34d9d1 ("RDMA: Explicitly pass in the dma_device to ib_register_device")
Signed-off-by: Parav Pandit <parav@nvidia.com>
---
 drivers/infiniband/sw/rdmavt/vt.c     | 5 +++--
 drivers/infiniband/sw/rxe/rxe_verbs.c | 4 +++-
 drivers/infiniband/sw/siw/siw_main.c  | 5 +++--
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c
index 52218684ad4a..1b456f4d4fcf 100644
--- a/drivers/infiniband/sw/rdmavt/vt.c
+++ b/drivers/infiniband/sw/rdmavt/vt.c
@@ -580,8 +580,9 @@ int rvt_register_device(struct rvt_dev_info *rdi)
 
 	/* DMA Operations */
 	rdi->ibdev.dev.dma_parms = rdi->ibdev.dev.parent->dma_parms;
-	dma_set_coherent_mask(&rdi->ibdev.dev,
-			      rdi->ibdev.dev.parent->coherent_dma_mask);
+	ret = dma_coerce_mask_and_coherent(&rdi->ibdev.dev, DMA_MASK_NONE);
+	if (ret)
+		goto bail_wss;
 
 	/* Protection Domain */
 	spin_lock_init(&rdi->n_pds_lock);
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 1fc022362fbe..357787688293 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -1130,7 +1130,9 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name)
 			    rxe->ndev->dev_addr);
 	dev->dev.dma_parms = &rxe->dma_parms;
 	dma_set_max_seg_size(&dev->dev, UINT_MAX);
-	dma_set_coherent_mask(&dev->dev, dma_get_required_mask(&dev->dev));
+	err = dma_coerce_mask_and_coherent(&dev->dev, DMA_MASK_NONE);
+	if (err)
+		return err;
 
 	dev->uverbs_cmd_mask = BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT)
 	    | BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL)
diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c
index ca8bc7296867..d3dc50a42dab 100644
--- a/drivers/infiniband/sw/siw/siw_main.c
+++ b/drivers/infiniband/sw/siw/siw_main.c
@@ -384,8 +384,9 @@ static struct siw_device *siw_device_create(struct net_device *netdev)
 	base_dev->dev.parent = parent;
 	base_dev->dev.dma_parms = &sdev->dma_parms;
 	dma_set_max_seg_size(&base_dev->dev, UINT_MAX);
-	dma_set_coherent_mask(&base_dev->dev,
-			      dma_get_required_mask(&base_dev->dev));
+	if (dma_coerce_mask_and_coherent(&base_dev->dev, DMA_MASK_NONE))
+		goto error;
+
 	base_dev->num_comp_vectors = num_possible_cpus();
 
 	xa_init_flags(&sdev->qp_xa, XA_FLAGS_ALLOC1);
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2020-11-02 19:20 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-10-30  9:38 [PATCH] RDMA: Fix software RDMA drivers for dma mapping error Parav Pandit
2020-10-30  9:48 ` Guoqing Jiang
2020-10-30 12:04 ` Dennis Dalessandro
2020-10-30 12:17 ` Jason Gunthorpe
2020-10-30 12:19   ` Dennis Dalessandro
2020-10-30 12:45     ` Parav Pandit
2020-10-30 15:01       ` hch
2020-10-30 15:01 ` Christoph Hellwig
2020-11-01  4:28 ` Zhu Yanjun
2020-11-01  9:20   ` Zhu Yanjun
2020-11-02 19:20 ` Jason Gunthorpe
  -- strict thread matches above, loose matches on Subject: below --
2020-10-27 12:52 WARNING in dma_map_page_attrs Parav Pandit
2020-10-28  8:03 ` [PATCH] RDMA: Fix software RDMA drivers for dma mapping error kernel test robot
2020-10-28  8:03   ` kernel test robot
2020-10-28  8:03   ` kernel test robot

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.