All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chuck Lever <cel@kernel.org>
To: unlisted-recipients:; (no To-header on input)
Cc: iommu@lists.linux.dev, linux-rdma@vger.kernel.org,
	Chuck Lever <chuck.lever@oracle.com>
Subject: [PATCH RFC 8/9] iommu/dma: Support DMA-mapping a bio_vec array
Date: Thu, 19 Oct 2023 11:26:24 -0400	[thread overview]
Message-ID: <169772918473.5232.6022085226786774578.stgit@klimt.1015granger.net> (raw)
In-Reply-To: <169772852492.5232.17148564580779995849.stgit@klimt.1015granger.net>

From: Chuck Lever <chuck.lever@oracle.com>

Cc: iommu@lists.linux.dev
Cc: linux-rdma@vger.kernel.org
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 drivers/iommu/dma-iommu.c |  368 +++++++++++++++++++++++++++++++++++++++++++++
 drivers/iommu/iommu.c     |   58 +++++++
 include/linux/iommu.h     |    4 
 3 files changed, 430 insertions(+)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 4b1a88f514c9..5ed15eac9a4a 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -554,6 +554,34 @@ static bool dev_use_sg_swiotlb(struct device *dev, struct scatterlist *sg,
 	return false;
 }
 
+static bool dev_use_bvecs_swiotlb(struct device *dev, struct bio_vec *bvecs,
+				  int nents, enum dma_data_direction dir)
+{
+	struct bio_vec *bv;
+	int i;
+
+	if (!IS_ENABLED(CONFIG_SWIOTLB))
+		return false;
+
+	if (dev_is_untrusted(dev))
+		return true;
+
+	/*
+	 * If kmalloc() buffers are not DMA-safe for this device and
+	 * direction, check the individual lengths in the sg list. If any
+	 * element is deemed unsafe, use the swiotlb for bouncing.
+	 */
+	if (!dma_kmalloc_safe(dev, dir)) {
+		for (i = 0; i < nents; i++) {
+			bv = &bvecs[i];
+			if (!dma_kmalloc_size_aligned(bv->bv_len))
+				return true;
+		}
+	}
+
+	return false;
+}
+
 /**
  * iommu_dma_init_domain - Initialise a DMA mapping domain
  * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
@@ -1026,6 +1054,49 @@ static void iommu_dma_sync_sg_for_device(struct device *dev,
 			arch_sync_dma_for_device(sg_phys(sg), sg->length, dir);
 }
 
+static void iommu_dma_sync_bvecs_for_cpu(struct device *dev,
+		struct bio_vec *bvecs, int nelems,
+		enum dma_data_direction dir)
+{
+	struct bio_vec *bv;
+	int i;
+
+	if (bv_dma_is_swiotlb(bvecs)) {
+		for (i = 0; i < nelems; i++) {
+			bv = &bvecs[i];
+			iommu_dma_sync_single_for_cpu(dev, bv_dma_address(bv),
+						      bv->bv_len, dir);
+		}
+	} else if (!dev_is_dma_coherent(dev)) {
+		for (i = 0; i < nelems; i++) {
+			bv = &bvecs[i];
+			arch_sync_dma_for_cpu(bv_phys(bv), bv->bv_len, dir);
+		}
+	}
+}
+
+static void iommu_dma_sync_bvecs_for_device(struct device *dev,
+		struct bio_vec *bvecs, int nelems,
+		enum dma_data_direction dir)
+{
+	struct bio_vec *bv;
+	int i;
+
+	if (bv_dma_is_swiotlb(bvecs)) {
+		for (i = 0; i < nelems; i++) {
+			bv = &bvecs[i];
+			iommu_dma_sync_single_for_device(dev,
+							 bv_dma_address(bv),
+							 bv->bv_len, dir);
+		}
+	} else if (!dev_is_dma_coherent(dev)) {
+		for (i = 0; i < nelems; i++) {
+			bv = &bvecs[i];
+			arch_sync_dma_for_device(bv_phys(bv), bv->bv_len, dir);
+		}
+	}
+}
+
 static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
 		unsigned long offset, size_t size, enum dma_data_direction dir,
 		unsigned long attrs)
@@ -1405,6 +1476,299 @@ static void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
 		__iommu_dma_unmap(dev, start, end - start);
 }
 
+/*
+ * Prepare a successfully-mapped bio_vec array to give back to the caller.
+ *
+ * At this point the elements are already laid out by iommu_dma_map_bvecs()
+ * to avoid individually crossing any boundaries, so we merely need to check
+ * an element's start address to avoid concatenating across one.
+ */
+static int __finalise_bvecs(struct device *dev, struct bio_vec *bvecs,
+		int nents, dma_addr_t dma_addr)
+{
+	unsigned int cur_len = 0, max_len = dma_get_max_seg_size(dev);
+	unsigned long seg_mask = dma_get_seg_boundary(dev);
+	struct bio_vec *cur = bvecs;
+	int i, count = 0;
+
+	for (i = 0; i < nents; i++) {
+		struct bio_vec *bv = &bvecs[i];
+
+		/* Restore this segment's original unaligned fields first */
+		dma_addr_t s_dma_addr = bv_dma_address(bv);
+		unsigned int s_iova_off = bv_dma_address(bv);
+		unsigned int s_length = bv_dma_len(bv);
+		unsigned int s_iova_len = bv->bv_len;
+
+		bv_dma_address(bv) = DMA_MAPPING_ERROR;
+		bv_dma_len(bv) = 0;
+
+		if (bv_dma_is_bus_address(bv)) {
+			if (i > 0)
+				cur++;
+
+			bv_dma_unmark_bus_address(bv);
+			bv_dma_address(cur) = s_dma_addr;
+			bv_dma_len(cur) = s_length;
+			bv_dma_mark_bus_address(cur);
+			count++;
+			cur_len = 0;
+			continue;
+		}
+
+		bv->bv_offset += s_iova_off;
+		bv->bv_len = s_length;
+
+		/*
+		 * Now fill in the real DMA data. If...
+		 * - there is a valid output segment to append to
+		 * - and this segment starts on an IOVA page boundary
+		 * - but doesn't fall at a segment boundary
+		 * - and wouldn't make the resulting output segment too long
+		 */
+		if (cur_len && !s_iova_off && (dma_addr & seg_mask) &&
+		    (max_len - cur_len >= s_length)) {
+			/* ...then concatenate it with the previous one */
+			cur_len += s_length;
+		} else {
+			/* Otherwise start the next output segment */
+			if (i > 0)
+				cur++;
+			cur_len = s_length;
+			count++;
+
+			bv_dma_address(cur) = dma_addr + s_iova_off;
+		}
+
+		bv_dma_len(cur) = cur_len;
+		dma_addr += s_iova_len;
+
+		if (s_length + s_iova_off < s_iova_len)
+			cur_len = 0;
+	}
+	return count;
+}
+
+/*
+ * If mapping failed, then just restore the original list,
+ * but making sure the DMA fields are invalidated.
+ */
+static void __invalidate_bvecs(struct bio_vec *bvecs, int nents)
+{
+	struct bio_vec *bv;
+	int i;
+
+	for (i = 0; i < nents; i++) {
+		bv = &bvecs[i];
+		if (bv_dma_is_bus_address(bv)) {
+			bv_dma_unmark_bus_address(bv);
+		} else {
+			if (bv_dma_address(bv) != DMA_MAPPING_ERROR)
+				bv->bv_offset += bv_dma_address(bv);
+			if (bv_dma_len(bv))
+				bv->bv_len = bv_dma_len(bv);
+		}
+		bv_dma_address(bv) = DMA_MAPPING_ERROR;
+		bv_dma_len(bv) = 0;
+	}
+}
+
+static void iommu_dma_unmap_bvecs_swiotlb(struct device *dev,
+		struct bio_vec *bvecs, int nents, enum dma_data_direction dir,
+		unsigned long attrs)
+{
+	struct bio_vec *bv;
+	int i;
+
+	for (i = 0; i < nents; i++) {
+		bv = &bvecs[i];
+		iommu_dma_unmap_page(dev, bv_dma_address(bv),
+				     bv_dma_len(bv), dir, attrs);
+	}
+}
+
+static int iommu_dma_map_bvecs_swiotlb(struct device *dev, struct bio_vec *bvecs,
+		int nents, enum dma_data_direction dir, unsigned long attrs)
+{
+	struct bio_vec *bv;
+	int i;
+
+	bv_dma_mark_swiotlb(bvecs);
+
+	for (i = 0; i < nents; i++) {
+		bv = &bvecs[i];
+		bv_dma_address(bv) = iommu_dma_map_page(dev, bv->bv_page,
+				bv->bv_offset, bv->bv_len, dir, attrs);
+		if (bv_dma_address(bv) == DMA_MAPPING_ERROR)
+			goto out_unmap;
+		bv_dma_len(bv) = bv->bv_len;
+	}
+
+	return nents;
+
+out_unmap:
+	iommu_dma_unmap_bvecs_swiotlb(dev, bvecs, i, dir,
+				      attrs | DMA_ATTR_SKIP_CPU_SYNC);
+	return -EIO;
+}
+
+/*
+ * The DMA API client is passing in an array of bio_vecs which could
+ * describe any old buffer layout, but the IOMMU API requires everything
+ * to be aligned to IOMMU pages. Hence the need for this complicated bit
+ * of impedance-matching, to be able to hand off a suitably-aligned list,
+ * but still preserve the original offsets and sizes for the caller.
+ */
+static int iommu_dma_map_bvecs(struct device *dev, struct bio_vec *bvecs,
+		int nents, enum dma_data_direction dir, unsigned long attrs)
+{
+	int prot = dma_info_to_prot(dir, dev_is_dma_coherent(dev), attrs);
+	struct iommu_domain *domain = iommu_get_dma_domain(dev);
+	struct iommu_dma_cookie *cookie = domain->iova_cookie;
+	unsigned long mask = dma_get_seg_boundary(dev);
+	struct iova_domain *iovad = &cookie->iovad;
+	size_t iova_len = 0;
+	dma_addr_t iova;
+	ssize_t ret;
+	int i;
+
+	if (static_branch_unlikely(&iommu_deferred_attach_enabled)) {
+		ret = iommu_deferred_attach(dev, domain);
+		if (ret)
+			goto out;
+	}
+
+	if (dev_use_bvecs_swiotlb(dev, bvecs, nents, dir))
+		return iommu_dma_map_bvecs_swiotlb(dev, bvecs, nents,
+						   dir, attrs);
+
+	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+		iommu_dma_sync_bvecs_for_device(dev, bvecs, nents, dir);
+
+	/*
+	 * Work out how much IOVA space we need, and align the segments to
+	 * IOVA granules for the IOMMU driver to handle. With some clever
+	 * trickery we can modify the list in-place, but reversibly, by
+	 * stashing the unaligned parts in the as-yet-unused DMA fields.
+	 */
+	for (i = 0; i < nents; i++) {
+		struct bio_vec *bv = &bvecs[i];
+		size_t s_iova_off = iova_offset(iovad, bv->bv_offset);
+		size_t pad_len = (mask - iova_len + 1) & mask;
+		size_t s_length = bv->bv_len;
+		struct bio_vec *prev = NULL;
+
+		bv_dma_address(bv) = s_iova_off;
+		bv_dma_len(bv) = s_length;
+		bv->bv_offset -= s_iova_off;
+		s_length = iova_align(iovad, s_length + s_iova_off);
+		bv->bv_len = s_length;
+
+		/*
+		 * Due to the alignment of our single IOVA allocation, we can
+		 * depend on these assumptions about the segment boundary mask:
+		 * - If mask size >= IOVA size, then the IOVA range cannot
+		 *   possibly fall across a boundary, so we don't care.
+		 * - If mask size < IOVA size, then the IOVA range must start
+		 *   exactly on a boundary, therefore we can lay things out
+		 *   based purely on segment lengths without needing to know
+		 *   the actual addresses beforehand.
+		 * - The mask must be a power of 2, so pad_len == 0 if
+		 *   iova_len == 0, thus we cannot dereference prev the first
+		 *   time through here (i.e. before it has a meaningful value).
+		 */
+		if (pad_len && pad_len < s_length - 1) {
+			prev->bv_len += pad_len;
+			iova_len += pad_len;
+		}
+
+		iova_len += s_length;
+		prev = bv;
+	}
+
+	if (!iova_len)
+		return __finalise_bvecs(dev, bvecs, nents, 0);
+
+	iova = iommu_dma_alloc_iova(domain, iova_len, dma_get_mask(dev), dev);
+	if (!iova) {
+		ret = -ENOMEM;
+		goto out_restore_sg;
+	}
+
+	/*
+	 * We'll leave any physical concatenation to the IOMMU driver's
+	 * implementation - it knows better than we do.
+	 */
+	ret = iommu_map_bvecs(domain, iova, bvecs, nents, prot, GFP_ATOMIC);
+	if (ret < 0 || ret < iova_len)
+		goto out_free_iova;
+
+	return __finalise_bvecs(dev, bvecs, nents, iova);
+
+out_free_iova:
+	iommu_dma_free_iova(cookie, iova, iova_len, NULL);
+out_restore_sg:
+	__invalidate_bvecs(bvecs, nents);
+out:
+	if (ret != -ENOMEM && ret != -EREMOTEIO)
+		return -EINVAL;
+	return ret;
+}
+
+static void iommu_dma_unmap_bvecs(struct device *dev, struct bio_vec *bvecs,
+		int nents, enum dma_data_direction dir, unsigned long attrs)
+{
+	dma_addr_t end = 0, start;
+	struct bio_vec *bv;
+	int i;
+
+	if (bv_dma_is_swiotlb(bvecs)) {
+		iommu_dma_unmap_bvecs_swiotlb(dev, bvecs, nents, dir, attrs);
+		return;
+	}
+
+	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+		iommu_dma_sync_bvecs_for_cpu(dev, bvecs, nents, dir);
+
+	/*
+	 * The bio_vec array elements are mapped into a single
+	 * contiguous IOVA allocation, the start and end points
+	 * just have to be determined.
+	 */
+	for (i = 0; i < nents; i++) {
+		bv = &bvecs[i];
+
+		if (bv_dma_is_bus_address(bv)) {
+			bv_dma_unmark_bus_address(bv);
+			continue;
+		}
+
+		if (bv_dma_len(bv) == 0)
+			break;
+
+		start = bv_dma_address(bv);
+		break;
+	}
+
+	nents -= i;
+	for (i = 0; i < nents; i++) {
+		bv = &bvecs[i];
+
+		if (bv_dma_is_bus_address(bv)) {
+			bv_dma_unmark_bus_address(bv);
+			continue;
+		}
+
+		if (bv_dma_len(bv) == 0)
+			break;
+
+		end = bv_dma_address(bv) + bv_dma_len(bv);
+	}
+
+	if (end)
+		__iommu_dma_unmap(dev, start, end - start);
+}
+
 static dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys,
 		size_t size, enum dma_data_direction dir, unsigned long attrs)
 {
@@ -1613,10 +1977,14 @@ static const struct dma_map_ops iommu_dma_ops = {
 	.unmap_page		= iommu_dma_unmap_page,
 	.map_sg			= iommu_dma_map_sg,
 	.unmap_sg		= iommu_dma_unmap_sg,
+	.map_bvecs		= iommu_dma_map_bvecs,
+	.unmap_bvecs		= iommu_dma_unmap_bvecs,
 	.sync_single_for_cpu	= iommu_dma_sync_single_for_cpu,
 	.sync_single_for_device	= iommu_dma_sync_single_for_device,
 	.sync_sg_for_cpu	= iommu_dma_sync_sg_for_cpu,
 	.sync_sg_for_device	= iommu_dma_sync_sg_for_device,
+	.sync_bvecs_for_cpu	= iommu_dma_sync_bvecs_for_cpu,
+	.sync_bvecs_for_device	= iommu_dma_sync_bvecs_for_device,
 	.map_resource		= iommu_dma_map_resource,
 	.unmap_resource		= iommu_dma_unmap_resource,
 	.get_merge_boundary	= iommu_dma_get_merge_boundary,
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 3bfc56df4f78..a117917bf9d0 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -2669,6 +2669,64 @@ ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
 }
 EXPORT_SYMBOL_GPL(iommu_map_sg);
 
+ssize_t iommu_map_bvecs(struct iommu_domain *domain, unsigned long iova,
+			struct bio_vec *bv, unsigned int nents, int prot,
+			 gfp_t gfp)
+{
+	const struct iommu_domain_ops *ops = domain->ops;
+	size_t len = 0, mapped = 0;
+	unsigned int i = 0;
+	phys_addr_t start;
+	int ret;
+
+	might_sleep_if(gfpflags_allow_blocking(gfp));
+
+	/* Discourage passing strange GFP flags */
+	if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 |
+				__GFP_HIGHMEM)))
+		return -EINVAL;
+
+	while (i <= nents) {
+		phys_addr_t b_phys = bv_phys(bv);
+
+		if (len && b_phys != start + len) {
+			ret = __iommu_map(domain, iova + mapped, start,
+					len, prot, gfp);
+
+			if (ret)
+				goto out_err;
+
+			mapped += len;
+			len = 0;
+		}
+
+		if (bv_dma_is_bus_address(bv))
+			goto next;
+
+		if (len) {
+			len += bv->bv_len;
+		} else {
+			len = bv->bv_len;
+			start = b_phys;
+		}
+
+next:
+		if (++i < nents)
+			bv++;
+	}
+
+	if (ops->iotlb_sync_map)
+		ops->iotlb_sync_map(domain, iova, mapped);
+	return mapped;
+
+out_err:
+	/* undo mappings already done */
+	iommu_unmap(domain, iova, mapped);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(iommu_map_bvecs);
+
 /**
  * report_iommu_fault() - report about an IOMMU fault to the IOMMU framework
  * @domain: the iommu domain where the fault has happened
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index c50a769d569a..9f7120314fda 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -8,6 +8,7 @@
 #define __LINUX_IOMMU_H
 
 #include <linux/scatterlist.h>
+#include <linux/bvec.h>
 #include <linux/device.h>
 #include <linux/types.h>
 #include <linux/errno.h>
@@ -485,6 +486,9 @@ extern size_t iommu_unmap_fast(struct iommu_domain *domain,
 extern ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
 			    struct scatterlist *sg, unsigned int nents,
 			    int prot, gfp_t gfp);
+extern ssize_t iommu_map_bvecs(struct iommu_domain *domain, unsigned long iova,
+			       struct bio_vec *bvecs, unsigned int nents,
+			       int prot, gfp_t gfp);
 extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova);
 extern void iommu_set_fault_handler(struct iommu_domain *domain,
 			iommu_fault_handler_t handler, void *token);



WARNING: multiple messages have this Message-ID (diff)
From: Chuck Lever <cel@kernel.org>
Cc: iommu@lists.linux.dev, linux-rdma@vger.kernel.org,
	Chuck Lever <chuck.lever@oracle.com>
Subject: [PATCH RFC 8/9] iommu/dma: Support DMA-mapping a bio_vec array
Date: Thu, 19 Oct 2023 11:26:24 -0400	[thread overview]
Message-ID: <169772918473.5232.6022085226786774578.stgit@klimt.1015granger.net> (raw)
In-Reply-To: <169772852492.5232.17148564580779995849.stgit@klimt.1015granger.net>

From: Chuck Lever <chuck.lever@oracle.com>

Cc: iommu@lists.linux.dev
Cc: linux-rdma@vger.kernel.org
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 drivers/iommu/dma-iommu.c |  368 +++++++++++++++++++++++++++++++++++++++++++++
 drivers/iommu/iommu.c     |   58 +++++++
 include/linux/iommu.h     |    4 
 3 files changed, 430 insertions(+)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 4b1a88f514c9..5ed15eac9a4a 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -554,6 +554,34 @@ static bool dev_use_sg_swiotlb(struct device *dev, struct scatterlist *sg,
 	return false;
 }
 
+static bool dev_use_bvecs_swiotlb(struct device *dev, struct bio_vec *bvecs,
+				  int nents, enum dma_data_direction dir)
+{
+	struct bio_vec *bv;
+	int i;
+
+	if (!IS_ENABLED(CONFIG_SWIOTLB))
+		return false;
+
+	if (dev_is_untrusted(dev))
+		return true;
+
+	/*
+	 * If kmalloc() buffers are not DMA-safe for this device and
+	 * direction, check the individual lengths in the sg list. If any
+	 * element is deemed unsafe, use the swiotlb for bouncing.
+	 */
+	if (!dma_kmalloc_safe(dev, dir)) {
+		for (i = 0; i < nents; i++) {
+			bv = &bvecs[i];
+			if (!dma_kmalloc_size_aligned(bv->bv_len))
+				return true;
+		}
+	}
+
+	return false;
+}
+
 /**
  * iommu_dma_init_domain - Initialise a DMA mapping domain
  * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
@@ -1026,6 +1054,49 @@ static void iommu_dma_sync_sg_for_device(struct device *dev,
 			arch_sync_dma_for_device(sg_phys(sg), sg->length, dir);
 }
 
+static void iommu_dma_sync_bvecs_for_cpu(struct device *dev,
+		struct bio_vec *bvecs, int nelems,
+		enum dma_data_direction dir)
+{
+	struct bio_vec *bv;
+	int i;
+
+	if (bv_dma_is_swiotlb(bvecs)) {
+		for (i = 0; i < nelems; i++) {
+			bv = &bvecs[i];
+			iommu_dma_sync_single_for_cpu(dev, bv_dma_address(bv),
+						      bv->bv_len, dir);
+		}
+	} else if (!dev_is_dma_coherent(dev)) {
+		for (i = 0; i < nelems; i++) {
+			bv = &bvecs[i];
+			arch_sync_dma_for_cpu(bv_phys(bv), bv->bv_len, dir);
+		}
+	}
+}
+
+static void iommu_dma_sync_bvecs_for_device(struct device *dev,
+		struct bio_vec *bvecs, int nelems,
+		enum dma_data_direction dir)
+{
+	struct bio_vec *bv;
+	int i;
+
+	if (bv_dma_is_swiotlb(bvecs)) {
+		for (i = 0; i < nelems; i++) {
+			bv = &bvecs[i];
+			iommu_dma_sync_single_for_device(dev,
+							 bv_dma_address(bv),
+							 bv->bv_len, dir);
+		}
+	} else if (!dev_is_dma_coherent(dev)) {
+		for (i = 0; i < nelems; i++) {
+			bv = &bvecs[i];
+			arch_sync_dma_for_device(bv_phys(bv), bv->bv_len, dir);
+		}
+	}
+}
+
 static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
 		unsigned long offset, size_t size, enum dma_data_direction dir,
 		unsigned long attrs)
@@ -1405,6 +1476,299 @@ static void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
 		__iommu_dma_unmap(dev, start, end - start);
 }
 
+/*
+ * Prepare a successfully-mapped bio_vec array to give back to the caller.
+ *
+ * At this point the elements are already laid out by iommu_dma_map_bvecs()
+ * to avoid individually crossing any boundaries, so we merely need to check
+ * an element's start address to avoid concatenating across one.
+ */
+static int __finalise_bvecs(struct device *dev, struct bio_vec *bvecs,
+		int nents, dma_addr_t dma_addr)
+{
+	unsigned int cur_len = 0, max_len = dma_get_max_seg_size(dev);
+	unsigned long seg_mask = dma_get_seg_boundary(dev);
+	struct bio_vec *cur = bvecs;
+	int i, count = 0;
+
+	for (i = 0; i < nents; i++) {
+		struct bio_vec *bv = &bvecs[i];
+
+		/* Restore this segment's original unaligned fields first */
+		dma_addr_t s_dma_addr = bv_dma_address(bv);
+		unsigned int s_iova_off = bv_dma_address(bv);
+		unsigned int s_length = bv_dma_len(bv);
+		unsigned int s_iova_len = bv->bv_len;
+
+		bv_dma_address(bv) = DMA_MAPPING_ERROR;
+		bv_dma_len(bv) = 0;
+
+		if (bv_dma_is_bus_address(bv)) {
+			if (i > 0)
+				cur++;
+
+			bv_dma_unmark_bus_address(bv);
+			bv_dma_address(cur) = s_dma_addr;
+			bv_dma_len(cur) = s_length;
+			bv_dma_mark_bus_address(cur);
+			count++;
+			cur_len = 0;
+			continue;
+		}
+
+		bv->bv_offset += s_iova_off;
+		bv->bv_len = s_length;
+
+		/*
+		 * Now fill in the real DMA data. If...
+		 * - there is a valid output segment to append to
+		 * - and this segment starts on an IOVA page boundary
+		 * - but doesn't fall at a segment boundary
+		 * - and wouldn't make the resulting output segment too long
+		 */
+		if (cur_len && !s_iova_off && (dma_addr & seg_mask) &&
+		    (max_len - cur_len >= s_length)) {
+			/* ...then concatenate it with the previous one */
+			cur_len += s_length;
+		} else {
+			/* Otherwise start the next output segment */
+			if (i > 0)
+				cur++;
+			cur_len = s_length;
+			count++;
+
+			bv_dma_address(cur) = dma_addr + s_iova_off;
+		}
+
+		bv_dma_len(cur) = cur_len;
+		dma_addr += s_iova_len;
+
+		if (s_length + s_iova_off < s_iova_len)
+			cur_len = 0;
+	}
+	return count;
+}
+
+/*
+ * If mapping failed, then just restore the original list,
+ * but making sure the DMA fields are invalidated.
+ */
+static void __invalidate_bvecs(struct bio_vec *bvecs, int nents)
+{
+	struct bio_vec *bv;
+	int i;
+
+	for (i = 0; i < nents; i++) {
+		bv = &bvecs[i];
+		if (bv_dma_is_bus_address(bv)) {
+			bv_dma_unmark_bus_address(bv);
+		} else {
+			if (bv_dma_address(bv) != DMA_MAPPING_ERROR)
+				bv->bv_offset += bv_dma_address(bv);
+			if (bv_dma_len(bv))
+				bv->bv_len = bv_dma_len(bv);
+		}
+		bv_dma_address(bv) = DMA_MAPPING_ERROR;
+		bv_dma_len(bv) = 0;
+	}
+}
+
+static void iommu_dma_unmap_bvecs_swiotlb(struct device *dev,
+		struct bio_vec *bvecs, int nents, enum dma_data_direction dir,
+		unsigned long attrs)
+{
+	struct bio_vec *bv;
+	int i;
+
+	for (i = 0; i < nents; i++) {
+		bv = &bvecs[i];
+		iommu_dma_unmap_page(dev, bv_dma_address(bv),
+				     bv_dma_len(bv), dir, attrs);
+	}
+}
+
+static int iommu_dma_map_bvecs_swiotlb(struct device *dev, struct bio_vec *bvecs,
+		int nents, enum dma_data_direction dir, unsigned long attrs)
+{
+	struct bio_vec *bv;
+	int i;
+
+	bv_dma_mark_swiotlb(bvecs);
+
+	for (i = 0; i < nents; i++) {
+		bv = &bvecs[i];
+		bv_dma_address(bv) = iommu_dma_map_page(dev, bv->bv_page,
+				bv->bv_offset, bv->bv_len, dir, attrs);
+		if (bv_dma_address(bv) == DMA_MAPPING_ERROR)
+			goto out_unmap;
+		bv_dma_len(bv) = bv->bv_len;
+	}
+
+	return nents;
+
+out_unmap:
+	iommu_dma_unmap_bvecs_swiotlb(dev, bvecs, i, dir,
+				      attrs | DMA_ATTR_SKIP_CPU_SYNC);
+	return -EIO;
+}
+
+/*
+ * The DMA API client is passing in an array of bio_vecs which could
+ * describe any old buffer layout, but the IOMMU API requires everything
+ * to be aligned to IOMMU pages. Hence the need for this complicated bit
+ * of impedance-matching, to be able to hand off a suitably-aligned list,
+ * but still preserve the original offsets and sizes for the caller.
+ */
+static int iommu_dma_map_bvecs(struct device *dev, struct bio_vec *bvecs,
+		int nents, enum dma_data_direction dir, unsigned long attrs)
+{
+	int prot = dma_info_to_prot(dir, dev_is_dma_coherent(dev), attrs);
+	struct iommu_domain *domain = iommu_get_dma_domain(dev);
+	struct iommu_dma_cookie *cookie = domain->iova_cookie;
+	unsigned long mask = dma_get_seg_boundary(dev);
+	struct iova_domain *iovad = &cookie->iovad;
+	size_t iova_len = 0;
+	dma_addr_t iova;
+	ssize_t ret;
+	int i;
+
+	if (static_branch_unlikely(&iommu_deferred_attach_enabled)) {
+		ret = iommu_deferred_attach(dev, domain);
+		if (ret)
+			goto out;
+	}
+
+	if (dev_use_bvecs_swiotlb(dev, bvecs, nents, dir))
+		return iommu_dma_map_bvecs_swiotlb(dev, bvecs, nents,
+						   dir, attrs);
+
+	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+		iommu_dma_sync_bvecs_for_device(dev, bvecs, nents, dir);
+
+	/*
+	 * Work out how much IOVA space we need, and align the segments to
+	 * IOVA granules for the IOMMU driver to handle. With some clever
+	 * trickery we can modify the list in-place, but reversibly, by
+	 * stashing the unaligned parts in the as-yet-unused DMA fields.
+	 */
+	for (i = 0; i < nents; i++) {
+		struct bio_vec *bv = &bvecs[i];
+		size_t s_iova_off = iova_offset(iovad, bv->bv_offset);
+		size_t pad_len = (mask - iova_len + 1) & mask;
+		size_t s_length = bv->bv_len;
+		struct bio_vec *prev = NULL;
+
+		bv_dma_address(bv) = s_iova_off;
+		bv_dma_len(bv) = s_length;
+		bv->bv_offset -= s_iova_off;
+		s_length = iova_align(iovad, s_length + s_iova_off);
+		bv->bv_len = s_length;
+
+		/*
+		 * Due to the alignment of our single IOVA allocation, we can
+		 * depend on these assumptions about the segment boundary mask:
+		 * - If mask size >= IOVA size, then the IOVA range cannot
+		 *   possibly fall across a boundary, so we don't care.
+		 * - If mask size < IOVA size, then the IOVA range must start
+		 *   exactly on a boundary, therefore we can lay things out
+		 *   based purely on segment lengths without needing to know
+		 *   the actual addresses beforehand.
+		 * - The mask must be a power of 2, so pad_len == 0 if
+		 *   iova_len == 0, thus we cannot dereference prev the first
+		 *   time through here (i.e. before it has a meaningful value).
+		 */
+		if (pad_len && pad_len < s_length - 1) {
+			prev->bv_len += pad_len;
+			iova_len += pad_len;
+		}
+
+		iova_len += s_length;
+		prev = bv;
+	}
+
+	if (!iova_len)
+		return __finalise_bvecs(dev, bvecs, nents, 0);
+
+	iova = iommu_dma_alloc_iova(domain, iova_len, dma_get_mask(dev), dev);
+	if (!iova) {
+		ret = -ENOMEM;
+		goto out_restore_sg;
+	}
+
+	/*
+	 * We'll leave any physical concatenation to the IOMMU driver's
+	 * implementation - it knows better than we do.
+	 */
+	ret = iommu_map_bvecs(domain, iova, bvecs, nents, prot, GFP_ATOMIC);
+	if (ret < 0 || ret < iova_len)
+		goto out_free_iova;
+
+	return __finalise_bvecs(dev, bvecs, nents, iova);
+
+out_free_iova:
+	iommu_dma_free_iova(cookie, iova, iova_len, NULL);
+out_restore_sg:
+	__invalidate_bvecs(bvecs, nents);
+out:
+	if (ret != -ENOMEM && ret != -EREMOTEIO)
+		return -EINVAL;
+	return ret;
+}
+
+static void iommu_dma_unmap_bvecs(struct device *dev, struct bio_vec *bvecs,
+		int nents, enum dma_data_direction dir, unsigned long attrs)
+{
+	dma_addr_t end = 0, start;
+	struct bio_vec *bv;
+	int i;
+
+	if (bv_dma_is_swiotlb(bvecs)) {
+		iommu_dma_unmap_bvecs_swiotlb(dev, bvecs, nents, dir, attrs);
+		return;
+	}
+
+	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+		iommu_dma_sync_bvecs_for_cpu(dev, bvecs, nents, dir);
+
+	/*
+	 * The bio_vec array elements are mapped into a single
+	 * contiguous IOVA allocation, the start and end points
+	 * just have to be determined.
+	 */
+	for (i = 0; i < nents; i++) {
+		bv = &bvecs[i];
+
+		if (bv_dma_is_bus_address(bv)) {
+			bv_dma_unmark_bus_address(bv);
+			continue;
+		}
+
+		if (bv_dma_len(bv) == 0)
+			break;
+
+		start = bv_dma_address(bv);
+		break;
+	}
+
+	nents -= i;
+	for (i = 0; i < nents; i++) {
+		bv = &bvecs[i];
+
+		if (bv_dma_is_bus_address(bv)) {
+			bv_dma_unmark_bus_address(bv);
+			continue;
+		}
+
+		if (bv_dma_len(bv) == 0)
+			break;
+
+		end = bv_dma_address(bv) + bv_dma_len(bv);
+	}
+
+	if (end)
+		__iommu_dma_unmap(dev, start, end - start);
+}
+
 static dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys,
 		size_t size, enum dma_data_direction dir, unsigned long attrs)
 {
@@ -1613,10 +1977,14 @@ static const struct dma_map_ops iommu_dma_ops = {
 	.unmap_page		= iommu_dma_unmap_page,
 	.map_sg			= iommu_dma_map_sg,
 	.unmap_sg		= iommu_dma_unmap_sg,
+	.map_bvecs		= iommu_dma_map_bvecs,
+	.unmap_bvecs		= iommu_dma_unmap_bvecs,
 	.sync_single_for_cpu	= iommu_dma_sync_single_for_cpu,
 	.sync_single_for_device	= iommu_dma_sync_single_for_device,
 	.sync_sg_for_cpu	= iommu_dma_sync_sg_for_cpu,
 	.sync_sg_for_device	= iommu_dma_sync_sg_for_device,
+	.sync_bvecs_for_cpu	= iommu_dma_sync_bvecs_for_cpu,
+	.sync_bvecs_for_device	= iommu_dma_sync_bvecs_for_device,
 	.map_resource		= iommu_dma_map_resource,
 	.unmap_resource		= iommu_dma_unmap_resource,
 	.get_merge_boundary	= iommu_dma_get_merge_boundary,
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 3bfc56df4f78..a117917bf9d0 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -2669,6 +2669,64 @@ ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
 }
 EXPORT_SYMBOL_GPL(iommu_map_sg);
 
+ssize_t iommu_map_bvecs(struct iommu_domain *domain, unsigned long iova,
+			struct bio_vec *bv, unsigned int nents, int prot,
+			 gfp_t gfp)
+{
+	const struct iommu_domain_ops *ops = domain->ops;
+	size_t len = 0, mapped = 0;
+	unsigned int i = 0;
+	phys_addr_t start;
+	int ret;
+
+	might_sleep_if(gfpflags_allow_blocking(gfp));
+
+	/* Discourage passing strange GFP flags */
+	if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 |
+				__GFP_HIGHMEM)))
+		return -EINVAL;
+
+	while (i <= nents) {
+		phys_addr_t b_phys = bv_phys(bv);
+
+		if (len && b_phys != start + len) {
+			ret = __iommu_map(domain, iova + mapped, start,
+					len, prot, gfp);
+
+			if (ret)
+				goto out_err;
+
+			mapped += len;
+			len = 0;
+		}
+
+		if (bv_dma_is_bus_address(bv))
+			goto next;
+
+		if (len) {
+			len += bv->bv_len;
+		} else {
+			len = bv->bv_len;
+			start = b_phys;
+		}
+
+next:
+		if (++i < nents)
+			bv++;
+	}
+
+	if (ops->iotlb_sync_map)
+		ops->iotlb_sync_map(domain, iova, mapped);
+	return mapped;
+
+out_err:
+	/* undo mappings already done */
+	iommu_unmap(domain, iova, mapped);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(iommu_map_bvecs);
+
 /**
  * report_iommu_fault() - report about an IOMMU fault to the IOMMU framework
  * @domain: the iommu domain where the fault has happened
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index c50a769d569a..9f7120314fda 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -8,6 +8,7 @@
 #define __LINUX_IOMMU_H
 
 #include <linux/scatterlist.h>
+#include <linux/bvec.h>
 #include <linux/device.h>
 #include <linux/types.h>
 #include <linux/errno.h>
@@ -485,6 +486,9 @@ extern size_t iommu_unmap_fast(struct iommu_domain *domain,
 extern ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
 			    struct scatterlist *sg, unsigned int nents,
 			    int prot, gfp_t gfp);
+extern ssize_t iommu_map_bvecs(struct iommu_domain *domain, unsigned long iova,
+			       struct bio_vec *bvecs, unsigned int nents,
+			       int prot, gfp_t gfp);
 extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova);
 extern void iommu_set_fault_handler(struct iommu_domain *domain,
 			iommu_fault_handler_t handler, void *token);



  parent reply	other threads:[~2023-10-19 15:26 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-10-19 15:25 [PATCH RFC 0/9] Exploring biovec support in (R)DMA API Chuck Lever
2023-10-19 15:25 ` Chuck Lever
2023-10-19 15:25 ` [PATCH RFC 1/9] dma-debug: Fix a typo in a debugging eye-catcher Chuck Lever
2023-10-20  4:49   ` Christoph Hellwig
2023-10-20 13:38     ` Chuck Lever III
2023-10-23  5:56       ` Christoph Hellwig
2023-10-19 15:25 ` [PATCH RFC 2/9] bvec: Add bio_vec fields to manage DMA mapping Chuck Lever
2023-10-19 15:25   ` Chuck Lever
2023-10-19 15:25 ` [PATCH RFC 3/9] dma-debug: Add dma_debug_ helpers for mapping bio_vec arrays Chuck Lever
2023-10-19 15:25   ` Chuck Lever
2023-10-19 21:38   ` kernel test robot
2023-10-19 23:21     ` Chuck Lever III
2023-10-23  2:43       ` Liu, Yujie
2023-10-23 14:27         ` Chuck Lever III
2023-10-19 21:49   ` kernel test robot
2023-10-19 15:25 ` [PATCH RFC 4/9] mm: kmsan: Add support for DMA " Chuck Lever
2023-10-19 15:25   ` Chuck Lever
2023-10-19 15:26 ` [PATCH RFC 5/9] dma-direct: Support direct " Chuck Lever
2023-10-19 15:26   ` Chuck Lever
2023-10-19 15:26 ` [PATCH RFC 6/9] DMA-API: Add dma_sync_bvecs_for_cpu() and dma_sync_bvecs_for_device() Chuck Lever
2023-10-19 15:26   ` Chuck Lever
2023-10-19 15:26 ` [PATCH RFC 7/9] DMA: Add dma_map_bvecs_attrs() Chuck Lever
2023-10-19 15:26   ` Chuck Lever
2023-10-19 22:10   ` kernel test robot
2023-10-19 15:26 ` Chuck Lever [this message]
2023-10-19 15:26   ` [PATCH RFC 8/9] iommu/dma: Support DMA-mapping a bio_vec array Chuck Lever
2023-10-19 15:26 ` [PATCH RFC 9/9] RDMA: Add helpers for DMA-mapping an array of bio_vecs Chuck Lever
2023-10-19 15:26   ` Chuck Lever
2023-10-19 15:53 ` [PATCH RFC 0/9] Exploring biovec support in (R)DMA API Matthew Wilcox
2023-10-19 17:48   ` Chuck Lever
2023-10-20  4:58   ` Christoph Hellwig
2023-10-20 10:30     ` Robin Murphy
2023-10-23  5:59       ` Christoph Hellwig
2023-10-19 16:43 ` Robin Murphy
2023-10-19 17:53   ` Jason Gunthorpe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=169772918473.5232.6022085226786774578.stgit@klimt.1015granger.net \
    --to=cel@kernel.org \
    --cc=chuck.lever@oracle.com \
    --cc=iommu@lists.linux.dev \
    --cc=linux-rdma@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.