All of lore.kernel.org
 help / color / mirror / Atom feed
From: Yishai Hadas <yishaih@nvidia.com>
To: <alex.williamson@redhat.com>, <jgg@nvidia.com>
Cc: <saeedm@nvidia.com>, <kvm@vger.kernel.org>,
	<netdev@vger.kernel.org>, <kuba@kernel.org>,
	<kevin.tian@intel.com>, <joao.m.martins@oracle.com>,
	<leonro@nvidia.com>, <yishaih@nvidia.com>, <maorg@nvidia.com>,
	<cohuck@redhat.com>
Subject: [PATCH V2 vfio 05/11] vfio: Add an IOVA bitmap support
Date: Thu, 14 Jul 2022 11:12:45 +0300	[thread overview]
Message-ID: <20220714081251.240584-6-yishaih@nvidia.com> (raw)
In-Reply-To: <20220714081251.240584-1-yishaih@nvidia.com>

From: Joao Martins <joao.m.martins@oracle.com>

The new facility adds a bunch of wrappers that abstract how an IOVA
range is represented in a bitmap that is granulated by a given
page_size. So it translates all the lifting of dealing with user
pointers into its corresponding kernel addresses backing said user
memory into doing finally the bitmap ops to change various bits.

The formula for the bitmap is:

   data[(iova / page_size) / 64] & (1ULL << (iova % 64))

Where 64 is the number of bits in a unsigned long (depending on arch)

An example usage of these helpers for a given @iova, @page_size, @length
and __user @data:

	iova_bitmap_init(&iter.dirty, iova, __ffs(page_size));
	ret = iova_bitmap_iter_init(&iter, iova, length, data);
	if (ret)
		return -ENOMEM;

	for (; !iova_bitmap_iter_done(&iter);
	     iova_bitmap_iter_advance(&iter)) {
		ret = iova_bitmap_iter_get(&iter);
		if (ret)
			break;
		if (dirty)
			iova_bitmap_set(iova_bitmap_iova(&iter),
					iova_bitmap_iova_length(&iter),
					&iter.dirty);

		iova_bitmap_iter_put(&iter);

		if (ret)
			break;
	}

	iova_bitmap_iter_free(&iter);

The facility is intended to be used for user bitmaps representing
dirtied IOVAs by IOMMU (via IOMMUFD) and PCI Devices (via vfio-pci).

Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
---
 drivers/vfio/Makefile       |   6 +-
 drivers/vfio/iova_bitmap.c  | 164 ++++++++++++++++++++++++++++++++++++
 include/linux/iova_bitmap.h |  46 ++++++++++
 3 files changed, 214 insertions(+), 2 deletions(-)
 create mode 100644 drivers/vfio/iova_bitmap.c
 create mode 100644 include/linux/iova_bitmap.h

diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile
index 1a32357592e3..1d6cad32d366 100644
--- a/drivers/vfio/Makefile
+++ b/drivers/vfio/Makefile
@@ -1,9 +1,11 @@
 # SPDX-License-Identifier: GPL-2.0
 vfio_virqfd-y := virqfd.o
 
-vfio-y += vfio_main.o
-
 obj-$(CONFIG_VFIO) += vfio.o
+
+vfio-y := vfio_main.o \
+          iova_bitmap.o \
+
 obj-$(CONFIG_VFIO_VIRQFD) += vfio_virqfd.o
 obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o
 obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o
diff --git a/drivers/vfio/iova_bitmap.c b/drivers/vfio/iova_bitmap.c
new file mode 100644
index 000000000000..9ad1533a6aec
--- /dev/null
+++ b/drivers/vfio/iova_bitmap.c
@@ -0,0 +1,164 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2022, Oracle and/or its affiliates.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved
+ */
+
+#include <linux/iova_bitmap.h>
+
+static unsigned long iova_bitmap_iova_to_index(struct iova_bitmap_iter *iter,
+					       unsigned long iova_length)
+{
+	unsigned long pgsize = 1 << iter->dirty.pgshift;
+
+	return DIV_ROUND_UP(iova_length, BITS_PER_TYPE(*iter->data) * pgsize);
+}
+
+static unsigned long iova_bitmap_index_to_iova(struct iova_bitmap_iter *iter,
+					       unsigned long index)
+{
+	unsigned long pgshift = iter->dirty.pgshift;
+
+	return (index * sizeof(*iter->data) * BITS_PER_BYTE) << pgshift;
+}
+
+static unsigned long iova_bitmap_iter_left(struct iova_bitmap_iter *iter)
+{
+	unsigned long left = iter->count - iter->offset;
+
+	left = min_t(unsigned long, left,
+		     (iter->dirty.npages << PAGE_SHIFT) / sizeof(*iter->data));
+
+	return left;
+}
+
+/*
+ * Input argument of number of bits to bitmap_set() is unsigned integer, which
+ * further casts to signed integer for unaligned multi-bit operation,
+ * __bitmap_set().
+ * Then maximum bitmap size supported is 2^31 bits divided by 2^3 bits/byte,
+ * that is 2^28 (256 MB) which maps to 2^31 * 2^12 = 2^43 (8TB) on 4K page
+ * system.
+ */
+int iova_bitmap_iter_init(struct iova_bitmap_iter *iter,
+			  unsigned long iova, unsigned long length,
+			  u64 __user *data)
+{
+	struct iova_bitmap *dirty = &iter->dirty;
+
+	iter->data = data;
+	iter->offset = 0;
+	iter->count = iova_bitmap_iova_to_index(iter, length);
+	iter->iova = iova;
+	iter->length = length;
+	dirty->pages = (struct page **)__get_free_page(GFP_KERNEL);
+
+	return !dirty->pages ? -ENOMEM : 0;
+}
+
+void iova_bitmap_iter_free(struct iova_bitmap_iter *iter)
+{
+	struct iova_bitmap *dirty = &iter->dirty;
+
+	if (dirty->pages) {
+		free_page((unsigned long)dirty->pages);
+		dirty->pages = NULL;
+	}
+}
+
+bool iova_bitmap_iter_done(struct iova_bitmap_iter *iter)
+{
+	return iter->offset >= iter->count;
+}
+
+unsigned long iova_bitmap_length(struct iova_bitmap_iter *iter)
+{
+	unsigned long max_iova = iter->dirty.iova + iter->length;
+	unsigned long left = iova_bitmap_iter_left(iter);
+	unsigned long iova = iova_bitmap_iova(iter);
+
+	left = iova_bitmap_index_to_iova(iter, left);
+	if (iova + left > max_iova)
+		left -= ((iova + left) - max_iova);
+
+	return left;
+}
+
+unsigned long iova_bitmap_iova(struct iova_bitmap_iter *iter)
+{
+	unsigned long skip = iter->offset;
+
+	return iter->iova + iova_bitmap_index_to_iova(iter, skip);
+}
+
+void iova_bitmap_iter_advance(struct iova_bitmap_iter *iter)
+{
+	unsigned long length = iova_bitmap_length(iter);
+
+	iter->offset += iova_bitmap_iova_to_index(iter, length);
+}
+
+void iova_bitmap_iter_put(struct iova_bitmap_iter *iter)
+{
+	struct iova_bitmap *dirty = &iter->dirty;
+
+	if (dirty->npages)
+		unpin_user_pages(dirty->pages, dirty->npages);
+}
+
+int iova_bitmap_iter_get(struct iova_bitmap_iter *iter)
+{
+	struct iova_bitmap *dirty = &iter->dirty;
+	unsigned long npages;
+	u64 __user *addr;
+	long ret;
+
+	npages = DIV_ROUND_UP((iter->count - iter->offset) *
+			      sizeof(*iter->data), PAGE_SIZE);
+	npages = min(npages,  PAGE_SIZE / sizeof(struct page *));
+	addr = iter->data + iter->offset;
+	ret = pin_user_pages_fast((unsigned long)addr, npages,
+				  FOLL_WRITE, dirty->pages);
+	if (ret <= 0)
+		return ret;
+
+	dirty->npages = (unsigned long)ret;
+	dirty->iova = iova_bitmap_iova(iter);
+	dirty->start_offset = offset_in_page(addr);
+	return 0;
+}
+
+void iova_bitmap_init(struct iova_bitmap *bitmap,
+		      unsigned long base, unsigned long pgshift)
+{
+	memset(bitmap, 0, sizeof(*bitmap));
+	bitmap->iova = base;
+	bitmap->pgshift = pgshift;
+}
+
+unsigned int iova_bitmap_set(struct iova_bitmap *dirty,
+			     unsigned long iova,
+			     unsigned long length)
+{
+	unsigned long nbits, offset, start_offset, idx, size, *kaddr;
+
+	nbits = max(1UL, length >> dirty->pgshift);
+	offset = (iova - dirty->iova) >> dirty->pgshift;
+	idx = offset / (PAGE_SIZE * BITS_PER_BYTE);
+	offset = offset % (PAGE_SIZE * BITS_PER_BYTE);
+	start_offset = dirty->start_offset;
+
+	while (nbits > 0) {
+		kaddr = kmap_local_page(dirty->pages[idx]) + start_offset;
+		size = min(PAGE_SIZE * BITS_PER_BYTE - offset, nbits);
+		bitmap_set(kaddr, offset, size);
+		kunmap_local(kaddr - start_offset);
+		start_offset = offset = 0;
+		nbits -= size;
+		idx++;
+	}
+
+	return nbits;
+}
+EXPORT_SYMBOL_GPL(iova_bitmap_set);
+
diff --git a/include/linux/iova_bitmap.h b/include/linux/iova_bitmap.h
new file mode 100644
index 000000000000..c474c351634a
--- /dev/null
+++ b/include/linux/iova_bitmap.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2022, Oracle and/or its affiliates.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved
+ */
+
+#ifndef _IOVA_BITMAP_H_
+#define _IOVA_BITMAP_H_
+
+#include <linux/highmem.h>
+#include <linux/mm.h>
+#include <linux/uio.h>
+
+struct iova_bitmap {
+	unsigned long iova;
+	unsigned long pgshift;
+	unsigned long start_offset;
+	unsigned long npages;
+	struct page **pages;
+};
+
+struct iova_bitmap_iter {
+	struct iova_bitmap dirty;
+	u64 __user *data;
+	size_t offset;
+	size_t count;
+	unsigned long iova;
+	unsigned long length;
+};
+
+int iova_bitmap_iter_init(struct iova_bitmap_iter *iter, unsigned long iova,
+			  unsigned long length, u64 __user *data);
+void iova_bitmap_iter_free(struct iova_bitmap_iter *iter);
+bool iova_bitmap_iter_done(struct iova_bitmap_iter *iter);
+unsigned long iova_bitmap_length(struct iova_bitmap_iter *iter);
+unsigned long iova_bitmap_iova(struct iova_bitmap_iter *iter);
+void iova_bitmap_iter_advance(struct iova_bitmap_iter *iter);
+int iova_bitmap_iter_get(struct iova_bitmap_iter *iter);
+void iova_bitmap_iter_put(struct iova_bitmap_iter *iter);
+void iova_bitmap_init(struct iova_bitmap *bitmap,
+		      unsigned long base, unsigned long pgshift);
+unsigned int iova_bitmap_set(struct iova_bitmap *dirty,
+			     unsigned long iova,
+			     unsigned long length);
+
+#endif
-- 
2.18.1


  parent reply	other threads:[~2022-07-14  8:15 UTC|newest]

Thread overview: 52+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-07-14  8:12 [PATCH V2 vfio 00/11] Add device DMA logging support for mlx5 driver Yishai Hadas
2022-07-14  8:12 ` [PATCH V2 vfio 01/11] net/mlx5: Introduce ifc bits for page tracker Yishai Hadas
2022-07-21  8:28   ` Tian, Kevin
2022-07-21  8:43     ` Yishai Hadas
2022-07-14  8:12 ` [PATCH V2 vfio 02/11] net/mlx5: Query ADV_VIRTUALIZATION capabilities Yishai Hadas
2022-07-14  8:12 ` [PATCH V2 vfio 03/11] vfio: Introduce DMA logging uAPIs Yishai Hadas
2022-07-18 22:29   ` Alex Williamson
2022-07-19  1:39     ` Tian, Kevin
2022-07-19  5:40       ` Kirti Wankhede
2022-07-19  7:49     ` Yishai Hadas
2022-07-19 19:57       ` Alex Williamson
2022-07-19 20:18         ` Jason Gunthorpe
2022-07-21  8:45   ` Tian, Kevin
2022-07-21 12:05     ` Jason Gunthorpe
2022-07-25  7:20       ` Tian, Kevin
2022-07-25 14:33         ` Jason Gunthorpe
2022-07-26  7:07           ` Tian, Kevin
     [not found]     ` <56bd06d3-944c-18da-86ed-ae14ce5940b7@nvidia.com>
2022-07-25  7:30       ` Tian, Kevin
2022-07-26  8:37         ` Yishai Hadas
2022-07-26 14:03           ` Alex Williamson
2022-07-26 15:04             ` Jason Gunthorpe
2022-07-28  4:05               ` Tian, Kevin
2022-07-28 12:06                 ` Jason Gunthorpe
2022-07-29  3:01                   ` Tian, Kevin
2022-07-29 14:11                     ` Jason Gunthorpe
2022-07-14  8:12 ` [PATCH V2 vfio 04/11] vfio: Move vfio.c to vfio_main.c Yishai Hadas
2022-07-14  8:12 ` Yishai Hadas [this message]
2022-07-18 22:30   ` [PATCH V2 vfio 05/11] vfio: Add an IOVA bitmap support Alex Williamson
2022-07-18 22:46     ` Jason Gunthorpe
2022-07-19 19:01   ` Alex Williamson
2022-07-20  1:57     ` Joao Martins
2022-07-20 16:47       ` Alex Williamson
2022-07-20 17:27         ` Jason Gunthorpe
2022-07-20 18:16         ` Joao Martins
2022-07-14  8:12 ` [PATCH V2 vfio 06/11] vfio: Introduce the DMA logging feature support Yishai Hadas
2022-07-18 22:30   ` Alex Williamson
2022-07-19  9:19     ` Yishai Hadas
2022-07-19 19:25       ` Alex Williamson
2022-07-19 20:08         ` Jason Gunthorpe
2022-07-21  8:54           ` Tian, Kevin
2022-07-21 11:50             ` Jason Gunthorpe
2022-07-25  7:38               ` Tian, Kevin
2022-07-25 14:37                 ` Jason Gunthorpe
2022-07-26  7:34                   ` Tian, Kevin
2022-07-26 15:12                     ` Jason Gunthorpe
2022-07-14  8:12 ` [PATCH V2 vfio 07/11] vfio/mlx5: Init QP based resources for dirty tracking Yishai Hadas
2022-07-14  8:12 ` [PATCH V2 vfio 08/11] vfio/mlx5: Create and destroy page tracker object Yishai Hadas
2022-07-14  8:12 ` [PATCH V2 vfio 09/11] vfio/mlx5: Report dirty pages from tracker Yishai Hadas
2022-07-14  8:12 ` [PATCH V2 vfio 10/11] vfio/mlx5: Manage error scenarios on tracker Yishai Hadas
2022-07-14  8:12 ` [PATCH V2 vfio 11/11] vfio/mlx5: Set the driver DMA logging callbacks Yishai Hadas
2022-07-21  8:26 ` [PATCH V2 vfio 00/11] Add device DMA logging support for mlx5 driver Tian, Kevin
2022-07-21  8:55   ` Yishai Hadas

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220714081251.240584-6-yishaih@nvidia.com \
    --to=yishaih@nvidia.com \
    --cc=alex.williamson@redhat.com \
    --cc=cohuck@redhat.com \
    --cc=jgg@nvidia.com \
    --cc=joao.m.martins@oracle.com \
    --cc=kevin.tian@intel.com \
    --cc=kuba@kernel.org \
    --cc=kvm@vger.kernel.org \
    --cc=leonro@nvidia.com \
    --cc=maorg@nvidia.com \
    --cc=netdev@vger.kernel.org \
    --cc=saeedm@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.