All of lore.kernel.org
 help / color / mirror / Atom feed
From: Leon Romanovsky <leon@kernel.org>
To: Christoph Hellwig <hch@lst.de>,
	Robin Murphy <robin.murphy@arm.com>,
	Marek Szyprowski <m.szyprowski@samsung.com>,
	Joerg Roedel <joro@8bytes.org>, Will Deacon <will@kernel.org>,
	Jason Gunthorpe <jgg@ziepe.ca>,
	Chaitanya Kulkarni <chaitanyak@nvidia.com>
Cc: "Leon Romanovsky" <leonro@nvidia.com>,
	"Jonathan Corbet" <corbet@lwn.net>,
	"Jens Axboe" <axboe@kernel.dk>, "Keith Busch" <kbusch@kernel.org>,
	"Sagi Grimberg" <sagi@grimberg.me>,
	"Yishai Hadas" <yishaih@nvidia.com>,
	"Shameer Kolothum" <shameerali.kolothum.thodi@huawei.com>,
	"Kevin Tian" <kevin.tian@intel.com>,
	"Alex Williamson" <alex.williamson@redhat.com>,
	"Jérôme Glisse" <jglisse@redhat.com>,
	"Andrew Morton" <akpm@linux-foundation.org>,
	linux-doc@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-block@vger.kernel.org, linux-rdma@vger.kernel.org,
	iommu@lists.linux.dev, linux-nvme@lists.infradead.org,
	kvm@vger.kernel.org, linux-mm@kvack.org,
	"Bart Van Assche" <bvanassche@acm.org>,
	"Damien Le Moal" <damien.lemoal@opensource.wdc.com>,
	"Amir Goldstein" <amir73il@gmail.com>,
	"josef@toxicpanda.com" <josef@toxicpanda.com>,
	"Martin K. Petersen" <martin.petersen@oracle.com>,
	"daniel@iogearbox.net" <daniel@iogearbox.net>,
	"Dan Williams" <dan.j.williams@intel.com>,
	"jack@suse.com" <jack@suse.com>,
	"Zhu Yanjun" <zyjzyj2000@gmail.com>
Subject: [RFC RESEND 10/16] RDMA/umem: Prevent UMEM ODP creation with SWIOTLB
Date: Tue,  5 Mar 2024 13:18:41 +0200	[thread overview]
Message-ID: <8c6d5e7db2d1a01888cc7b9b9850b05e19c75c64.1709635535.git.leon@kernel.org> (raw)
In-Reply-To: <cover.1709635535.git.leon@kernel.org>

From: Leon Romanovsky <leonro@nvidia.com>

RDMA UMEM never supported DMA addresses returned from SWIOTLB, as these
addresses should be programmed to the hardware which is not aware that
it is bounce buffers and not real ones.

Instead of silently leave broken system for the users who didn't
know it, let's be explicit and return an error to them.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 Documentation/core-api/dma-attributes.rst |  7 +++
 drivers/infiniband/core/umem_odp.c        | 77 +++++++++++------------
 include/linux/dma-mapping.h               |  6 ++
 kernel/dma/direct.h                       |  4 +-
 kernel/dma/mapping.c                      |  4 ++
 5 files changed, 58 insertions(+), 40 deletions(-)

diff --git a/Documentation/core-api/dma-attributes.rst b/Documentation/core-api/dma-attributes.rst
index 1887d92e8e92..b337ec65d506 100644
--- a/Documentation/core-api/dma-attributes.rst
+++ b/Documentation/core-api/dma-attributes.rst
@@ -130,3 +130,10 @@ accesses to DMA buffers in both privileged "supervisor" and unprivileged
 subsystem that the buffer is fully accessible at the elevated privilege
 level (and ideally inaccessible or at least read-only at the
 lesser-privileged levels).
+
+DMA_ATTR_NO_TRANSLATION
+-----------------------
+
+This attribute is used to indicate to the DMA-mapping subsystem that the
+buffer is not subject to any address translation.  This is used for devices
+that doesn't need buffer bouncing or fixing DMA addresses.
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index 1301009a6b78..57c56000f60e 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -50,51 +50,50 @@
 static inline int ib_init_umem_odp(struct ib_umem_odp *umem_odp,
 				   const struct mmu_interval_notifier_ops *ops)
 {
+	size_t page_size = 1UL << umem_odp->page_shift;
 	struct ib_device *dev = umem_odp->umem.ibdev;
+	size_t ndmas, npfns;
+	unsigned long start;
+	unsigned long end;
 	int ret;
 
 	umem_odp->umem.is_odp = 1;
 	mutex_init(&umem_odp->umem_mutex);
 
-	if (!umem_odp->is_implicit_odp) {
-		size_t page_size = 1UL << umem_odp->page_shift;
-		unsigned long start;
-		unsigned long end;
-		size_t ndmas, npfns;
-
-		start = ALIGN_DOWN(umem_odp->umem.address, page_size);
-		if (check_add_overflow(umem_odp->umem.address,
-				       (unsigned long)umem_odp->umem.length,
-				       &end))
-			return -EOVERFLOW;
-		end = ALIGN(end, page_size);
-		if (unlikely(end < page_size))
-			return -EOVERFLOW;
-
-		ndmas = (end - start) >> umem_odp->page_shift;
-		if (!ndmas)
-			return -EINVAL;
-
-		npfns = (end - start) >> PAGE_SHIFT;
-		umem_odp->pfn_list = kvcalloc(
-			npfns, sizeof(*umem_odp->pfn_list), GFP_KERNEL);
-		if (!umem_odp->pfn_list)
-			return -ENOMEM;
-
-
-		umem_odp->iova.dev = dev->dma_device;
-		umem_odp->iova.size = end - start;
-		umem_odp->iova.dir = DMA_BIDIRECTIONAL;
-		ret = ib_dma_alloc_iova(dev, &umem_odp->iova);
-		if (ret)
-			goto out_pfn_list;
-
-		ret = mmu_interval_notifier_insert(&umem_odp->notifier,
-						   umem_odp->umem.owning_mm,
-						   start, end - start, ops);
-		if (ret)
-			goto out_free_iova;
-	}
+	if (umem_odp->is_implicit_odp)
+		return 0;
+
+	start = ALIGN_DOWN(umem_odp->umem.address, page_size);
+	if (check_add_overflow(umem_odp->umem.address,
+			       (unsigned long)umem_odp->umem.length, &end))
+		return -EOVERFLOW;
+	end = ALIGN(end, page_size);
+	if (unlikely(end < page_size))
+		return -EOVERFLOW;
+
+	ndmas = (end - start) >> umem_odp->page_shift;
+	if (!ndmas)
+		return -EINVAL;
+
+	npfns = (end - start) >> PAGE_SHIFT;
+	umem_odp->pfn_list =
+		kvcalloc(npfns, sizeof(*umem_odp->pfn_list), GFP_KERNEL);
+	if (!umem_odp->pfn_list)
+		return -ENOMEM;
+
+	umem_odp->iova.dev = dev->dma_device;
+	umem_odp->iova.size = end - start;
+	umem_odp->iova.dir = DMA_BIDIRECTIONAL;
+	umem_odp->iova.attrs = DMA_ATTR_NO_TRANSLATION;
+	ret = ib_dma_alloc_iova(dev, &umem_odp->iova);
+	if (ret)
+		goto out_pfn_list;
+
+	ret = mmu_interval_notifier_insert(&umem_odp->notifier,
+					   umem_odp->umem.owning_mm, start,
+					   end - start, ops);
+	if (ret)
+		goto out_free_iova;
 
 	return 0;
 
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 91cc084adb53..89945e707a9b 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -62,6 +62,12 @@
  */
 #define DMA_ATTR_PRIVILEGED		(1UL << 9)
 
+/*
+ * DMA_ATTR_NO_TRANSLATION: used to indicate that the buffer should not be mapped
+ * through address translation.
+ */
+#define DMA_ATTR_NO_TRANSLATION		(1UL << 10)
+
 /*
  * A dma_addr_t can hold any valid DMA or bus address for the platform.  It can
  * be given to a device to use as a DMA source or target.  It is specific to a
diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h
index 1c30e1cd607a..1c9ec204c999 100644
--- a/kernel/dma/direct.h
+++ b/kernel/dma/direct.h
@@ -92,6 +92,8 @@ static inline dma_addr_t dma_direct_map_page(struct device *dev,
 	if (is_swiotlb_force_bounce(dev)) {
 		if (is_pci_p2pdma_page(page))
 			return DMA_MAPPING_ERROR;
+		if (attrs & DMA_ATTR_NO_TRANSLATION)
+			return DMA_MAPPING_ERROR;
 		return swiotlb_map(dev, phys, size, dir, attrs);
 	}
 
@@ -99,7 +101,7 @@ static inline dma_addr_t dma_direct_map_page(struct device *dev,
 	    dma_kmalloc_needs_bounce(dev, size, dir)) {
 		if (is_pci_p2pdma_page(page))
 			return DMA_MAPPING_ERROR;
-		if (is_swiotlb_active(dev))
+		if (is_swiotlb_active(dev) && !(attrs & DMA_ATTR_NO_TRANSLATION))
 			return swiotlb_map(dev, phys, size, dir, attrs);
 
 		dev_WARN_ONCE(dev, 1,
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
index f989c64622c2..49b1fde510c5 100644
--- a/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@ -188,6 +188,10 @@ int dma_alloc_iova(struct dma_iova_attrs *iova)
 	struct device *dev = iova->dev;
 	const struct dma_map_ops *ops = get_dma_ops(dev);
 
+	if (dma_map_direct(dev, ops) && is_swiotlb_force_bounce(dev) &&
+	    iova->attrs & DMA_ATTR_NO_TRANSLATION)
+		return -EOPNOTSUPP;
+
 	if (dma_map_direct(dev, ops) || !ops->alloc_iova) {
 		iova->addr = 0;
 		return 0;
-- 
2.44.0


  parent reply	other threads:[~2024-03-05 11:19 UTC|newest]

Thread overview: 59+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-03-05 11:18 [RFC RESEND 00/16] Split IOMMU DMA mapping operation to two steps Leon Romanovsky
2024-03-05 11:18 ` [RFC RESEND 01/16] mm/hmm: let users to tag specific PFNs Leon Romanovsky
2024-03-05 11:18 ` [RFC RESEND 02/16] dma-mapping: provide an interface to allocate IOVA Leon Romanovsky
2024-03-05 11:18 ` [RFC RESEND 03/16] dma-mapping: provide callbacks to link/unlink pages to specific IOVA Leon Romanovsky
2024-03-05 11:18 ` [RFC RESEND 04/16] iommu/dma: Provide an interface to allow preallocate IOVA Leon Romanovsky
2024-03-05 11:18 ` [RFC RESEND 05/16] iommu/dma: Prepare map/unmap page functions to receive IOVA Leon Romanovsky
2024-03-05 11:18 ` [RFC RESEND 06/16] iommu/dma: Implement link/unlink page callbacks Leon Romanovsky
2024-03-05 11:18 ` [RFC RESEND 07/16] RDMA/umem: Preallocate and cache IOVA for UMEM ODP Leon Romanovsky
2024-03-05 11:18 ` [RFC RESEND 08/16] RDMA/umem: Store ODP access mask information in PFN Leon Romanovsky
2024-03-05 11:18 ` [RFC RESEND 09/16] RDMA/core: Separate DMA mapping to caching IOVA and page linkage Leon Romanovsky
2024-03-05 11:18 ` Leon Romanovsky [this message]
2024-03-05 11:18 ` [RFC RESEND 11/16] vfio/mlx5: Explicitly use number of pages instead of allocated length Leon Romanovsky
2024-03-05 11:18 ` [RFC RESEND 12/16] vfio/mlx5: Rewrite create mkey flow to allow better code reuse Leon Romanovsky
2024-03-05 11:18 ` [RFC RESEND 13/16] vfio/mlx5: Explicitly store page list Leon Romanovsky
2024-03-05 11:18 ` [RFC RESEND 14/16] vfio/mlx5: Convert vfio to use DMA link API Leon Romanovsky
2024-03-05 11:18 ` [RFC RESEND 15/16] block: add dma_link_range() based API Leon Romanovsky
2024-03-05 11:18 ` [RFC RESEND 16/16] nvme-pci: use blk_rq_dma_map() for NVMe SGL Leon Romanovsky
2024-03-05 15:51   ` Keith Busch
2024-03-05 16:08     ` Jens Axboe
2024-03-05 16:39       ` Chaitanya Kulkarni
2024-03-05 16:46         ` Chaitanya Kulkarni
2024-03-06 14:33     ` Christoph Hellwig
2024-03-06 15:05       ` Jason Gunthorpe
2024-03-06 16:14         ` Christoph Hellwig
2024-05-03 14:41   ` Zhu Yanjun
2024-05-05 13:23     ` Leon Romanovsky
2024-05-06  7:25       ` Zhu Yanjun
2024-03-05 12:05 ` [RFC RESEND 00/16] Split IOMMU DMA mapping operation to two steps Robin Murphy
2024-03-05 12:29   ` Leon Romanovsky
2024-03-06 14:44     ` Christoph Hellwig
2024-03-06 15:43       ` Jason Gunthorpe
2024-03-06 16:20         ` Christoph Hellwig
2024-03-06 17:44           ` Jason Gunthorpe
2024-03-06 22:14             ` Christoph Hellwig
2024-03-07  0:00               ` Jason Gunthorpe
2024-03-07 15:05                 ` Christoph Hellwig
2024-03-07 21:01                   ` Jason Gunthorpe
2024-03-08 16:49                     ` Christoph Hellwig
2024-03-08 20:23                       ` Jason Gunthorpe
2024-03-09 16:14                         ` Christoph Hellwig
2024-03-10  9:35                           ` Leon Romanovsky
2024-03-12 21:28                             ` Christoph Hellwig
2024-03-13  7:46                               ` Leon Romanovsky
2024-03-13 21:44                                 ` Christoph Hellwig
2024-03-19 15:36                           ` Jason Gunthorpe
2024-03-20  8:55                             ` Leon Romanovsky
2024-03-21 22:40                               ` Christoph Hellwig
2024-03-22 17:46                                 ` Leon Romanovsky
2024-03-24 23:16                                   ` Christoph Hellwig
2024-03-21 22:39                             ` Christoph Hellwig
2024-03-22 18:43                               ` Jason Gunthorpe
2024-03-24 23:22                                 ` Christoph Hellwig
2024-03-27 17:14                                   ` Jason Gunthorpe
2024-03-07  6:01 ` Zhu Yanjun
2024-04-09 20:39   ` Zhu Yanjun
2024-05-02 23:32 ` Zeng, Oak
2024-05-03 11:57   ` Zhu Yanjun
2024-05-03 16:42   ` Jason Gunthorpe
2024-05-03 20:59     ` Zeng, Oak

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=8c6d5e7db2d1a01888cc7b9b9850b05e19c75c64.1709635535.git.leon@kernel.org \
    --to=leon@kernel.org \
    --cc=akpm@linux-foundation.org \
    --cc=alex.williamson@redhat.com \
    --cc=amir73il@gmail.com \
    --cc=axboe@kernel.dk \
    --cc=bvanassche@acm.org \
    --cc=chaitanyak@nvidia.com \
    --cc=corbet@lwn.net \
    --cc=damien.lemoal@opensource.wdc.com \
    --cc=dan.j.williams@intel.com \
    --cc=daniel@iogearbox.net \
    --cc=hch@lst.de \
    --cc=iommu@lists.linux.dev \
    --cc=jack@suse.com \
    --cc=jgg@ziepe.ca \
    --cc=jglisse@redhat.com \
    --cc=joro@8bytes.org \
    --cc=josef@toxicpanda.com \
    --cc=kbusch@kernel.org \
    --cc=kevin.tian@intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=leonro@nvidia.com \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-nvme@lists.infradead.org \
    --cc=linux-rdma@vger.kernel.org \
    --cc=m.szyprowski@samsung.com \
    --cc=martin.petersen@oracle.com \
    --cc=robin.murphy@arm.com \
    --cc=sagi@grimberg.me \
    --cc=shameerali.kolothum.thodi@huawei.com \
    --cc=will@kernel.org \
    --cc=yishaih@nvidia.com \
    --cc=zyjzyj2000@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.