All of lore.kernel.org
 help / color / mirror / Atom feed
From: Yi Liu <yi.l.liu@intel.com>
To: joro@8bytes.org, alex.williamson@redhat.com, jgg@nvidia.com,
	kevin.tian@intel.com, robin.murphy@arm.com
Cc: cohuck@redhat.com, eric.auger@redhat.com, nicolinc@nvidia.com,
	kvm@vger.kernel.org, mjrosato@linux.ibm.com,
	chao.p.peng@linux.intel.com, yi.l.liu@intel.com,
	yi.y.sun@linux.intel.com, peterx@redhat.com, jasowang@redhat.com,
	shameerali.kolothum.thodi@huawei.com, lulu@redhat.com,
	suravee.suthikulpanit@amd.com, iommu@lists.linux.dev,
	linux-kernel@vger.kernel.org, linux-kselftest@vger.kernel.org,
	baolu.lu@linux.intel.com
Subject: [PATCH 09/17] iommufd: Add kernel-managed hw_pagetable allocation for userspace
Date: Wed,  8 Feb 2023 20:31:45 -0800	[thread overview]
Message-ID: <20230209043153.14964-10-yi.l.liu@intel.com> (raw)
In-Reply-To: <20230209043153.14964-1-yi.l.liu@intel.com>

Today iommufd allocates the kernel-managed hw_pagetabe implicitly when
device is attached to an IOAS. This links the hw_pagetable to IOPT within
IOAS.

However, this is not the perfect way. It makes much sense to let userspace
explicitly request hw_pagtable allocation via iommufd. The reason is even
though the hw_pagetable is kernel-managed, the mappings are feed by
userspace. Also, this makes the lifecircle of kernel-managed hw_pagetable
more clear during usage. This is very important in the usage of nested
translation, in which the kernel-managed hw_pagetable would be used as the
stage-2 hw_pagetable. In such case, both stage-1 and stage-2 hw_pagetable
should be allocated by userspace to ensure the life-circle.

This adds an ioctl IOMMU_HWPT_ALLOC for the hw_pagetable allocation. For
kernel-managed hw_pagetable, userspace should provide an IOAS ID in the
allocation request.

Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
---
 drivers/iommu/iommufd/device.c          |  11 ++-
 drivers/iommu/iommufd/hw_pagetable.c    | 116 ++++++++++++++++++++++++
 drivers/iommu/iommufd/iommufd_private.h |  15 +++
 drivers/iommu/iommufd/main.c            |   3 +
 include/uapi/linux/iommufd.h            |  48 ++++++++++
 5 files changed, 191 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index 71a8c4f1c4a9..dd7943ff02e4 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -146,8 +146,7 @@ static int iommufd_zero_fill_user(u64 ptr, int bytes)
 	return 0;
 }
 
-static struct device *
-iommufd_obj_dev(struct iommufd_object *obj)
+struct device *iommufd_obj_dev(struct iommufd_object *obj)
 {
 	struct device *dev = NULL;
 
@@ -160,6 +159,14 @@ iommufd_obj_dev(struct iommufd_object *obj)
 	return dev;
 }
 
+/*
+ * bitmaps of supported page table data types of hardware iommu,
+ * indexed by the members defined in enum iommu_device_data_type.
+ */
+const u64 iommufd_supported_pgtbl_types[] =  {
+	[IOMMU_DEVICE_DATA_INTEL_VTD] = BIT_ULL(IOMMU_PGTBL_DATA_NONE),
+};
+
 int iommufd_device_get_info(struct iommufd_ucmd *ucmd)
 {
 	struct iommu_device_info *cmd = ucmd->cmd;
diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c
index ee97d2f3cf43..998d01490a74 100644
--- a/drivers/iommu/iommufd/hw_pagetable.c
+++ b/drivers/iommu/iommufd/hw_pagetable.c
@@ -92,3 +92,119 @@ iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
 {
 	return __iommufd_hw_pagetable_alloc(ictx, ioas, dev, NULL, NULL);
 }
+
+/*
+ * size of page table type specific data, indexed by
+ * enum iommu_pgtbl_data_type.
+ */
+static const size_t iommufd_hwpt_info_size[] = {
+	[IOMMU_PGTBL_DATA_NONE] = 0,
+};
+
+int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd)
+{
+	struct iommufd_hw_pagetable *hwpt;
+	struct iommu_hwpt_alloc *cmd = ucmd->cmd;
+	struct iommufd_ctx *ictx = ucmd->ictx;
+	struct iommufd_object *pt_obj = NULL;
+	struct iommufd_ioas *ioas = NULL;
+	struct iommufd_object *dev_obj;
+	struct device *dev;
+	const struct iommu_ops *ops;
+	void *data = NULL;
+	u32 driver_type, klen;
+	int rc;
+
+	if (cmd->__reserved || cmd->flags)
+		return -EOPNOTSUPP;
+
+	dev_obj = iommufd_get_object(ucmd->ictx, cmd->dev_id,
+				     IOMMUFD_OBJ_ANY);
+	if (IS_ERR(dev_obj))
+		return PTR_ERR(dev_obj);
+
+	dev = iommufd_obj_dev(dev_obj);
+	if (!dev) {
+		rc = -EINVAL;
+		goto out_put_dev;
+	}
+
+	ops = dev_iommu_ops(dev);
+	if (!ops) {
+		rc = -EOPNOTSUPP;
+		goto out_put_dev;
+	}
+
+	driver_type = ops->driver_type;
+
+	/* data_type should be a supported type by the hardware */
+	if (!((1 << cmd->data_type) &
+			iommufd_supported_pgtbl_types[driver_type])) {
+		rc = -EINVAL;
+		goto out_put_dev;
+	}
+
+	pt_obj = iommufd_get_object(ictx, cmd->pt_id, IOMMUFD_OBJ_ANY);
+	if (IS_ERR(pt_obj)) {
+		rc = -EINVAL;
+		goto out_put_dev;
+	}
+
+	switch (pt_obj->type) {
+	case IOMMUFD_OBJ_IOAS:
+		ioas = container_of(pt_obj, struct iommufd_ioas, obj);
+		break;
+	default:
+		rc = -EINVAL;
+		goto out_put_pt;
+	}
+
+	klen = iommufd_hwpt_info_size[cmd->data_type];
+	if (klen) {
+		if (!cmd->data_len) {
+			rc = -EINVAL;
+			goto out_put_pt;
+		}
+
+		data = kzalloc(klen, GFP_KERNEL);
+		if (!data) {
+			rc = -ENOMEM;
+			goto out_put_pt;
+		}
+
+		rc = copy_struct_from_user(data, klen,
+					   u64_to_user_ptr(cmd->data_uptr),
+					   cmd->data_len);
+		if (rc)
+			goto out_free_data;
+	}
+
+	mutex_lock(&ioas->mutex);
+	hwpt = __iommufd_hw_pagetable_alloc(ictx, ioas, dev, NULL, data);
+	mutex_unlock(&ioas->mutex);
+	if (IS_ERR(hwpt)) {
+		rc = PTR_ERR(hwpt);
+		goto out_free_data;
+	}
+
+	cmd->out_hwpt_id = hwpt->obj.id;
+
+	rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
+	if (rc)
+		goto out_destroy_hwpt;
+
+	kfree(data);
+	iommufd_object_finalize(ucmd->ictx, &hwpt->obj);
+	iommufd_put_object(pt_obj);
+	iommufd_put_object(dev_obj);
+	return 0;
+out_destroy_hwpt:
+	iommufd_object_abort_and_destroy(ucmd->ictx, &hwpt->obj);
+out_free_data:
+	kfree(data);
+out_put_pt:
+	iommufd_put_object(pt_obj);
+out_put_dev:
+	iommufd_put_object(dev_obj);
+	return rc;
+}
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index 604ad29f87b8..ee5344baf135 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -257,7 +257,22 @@ iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
 			   struct device *dev);
 void iommufd_hw_pagetable_destroy(struct iommufd_object *obj);
 
+static inline struct iommufd_hw_pagetable *
+iommufd_get_hwpt(struct iommufd_ucmd *ucmd, u32 id)
+{
+	return container_of(iommufd_get_object(ucmd->ictx, id,
+					       IOMMUFD_OBJ_HW_PAGETABLE),
+			    struct iommufd_hw_pagetable, obj);
+}
+
+int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd);
+
+struct device *iommufd_obj_dev(struct iommufd_object *obj);
+
 void iommufd_device_destroy(struct iommufd_object *obj);
+
+extern const u64 iommufd_supported_pgtbl_types[];
+
 int iommufd_device_get_info(struct iommufd_ucmd *ucmd);
 
 struct iommufd_access {
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index 59aa30ad1090..831303d64abe 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -251,6 +251,7 @@ static int iommufd_option(struct iommufd_ucmd *ucmd)
 union ucmd_buffer {
 	struct iommu_destroy destroy;
 	struct iommu_device_info info;
+	struct iommu_hwpt_alloc hwpt;
 	struct iommu_ioas_alloc alloc;
 	struct iommu_ioas_allow_iovas allow_iovas;
 	struct iommu_ioas_copy ioas_copy;
@@ -284,6 +285,8 @@ static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = {
 	IOCTL_OP(IOMMU_DESTROY, iommufd_destroy, struct iommu_destroy, id),
 	IOCTL_OP(IOMMU_DEVICE_GET_INFO, iommufd_device_get_info, struct iommu_device_info,
 		 __reserved),
+	IOCTL_OP(IOMMU_HWPT_ALLOC, iommufd_hwpt_alloc, struct iommu_hwpt_alloc,
+		 __reserved),
 	IOCTL_OP(IOMMU_IOAS_ALLOC, iommufd_ioas_alloc_ioctl,
 		 struct iommu_ioas_alloc, out_ioas_id),
 	IOCTL_OP(IOMMU_IOAS_ALLOW_IOVAS, iommufd_ioas_allow_iovas,
diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index bbffb63d2513..f501add5ffe9 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -46,6 +46,7 @@ enum {
 	IOMMUFD_CMD_OPTION,
 	IOMMUFD_CMD_VFIO_IOAS,
 	IOMMUFD_CMD_DEVICE_GET_INFO,
+	IOMMUFD_CMD_HWPT_ALLOC,
 };
 
 /**
@@ -373,6 +374,14 @@ struct iommu_device_info_vtd {
 	__aligned_u64 ecap_reg;
 };
 
+/**
+ * enum iommu_pgtbl_data_type - IOMMU Page Table User Data type
+ * @IOMMU_PGTBL_DATA_NONE: no user data
+ */
+enum iommu_pgtbl_data_type {
+	IOMMU_PGTBL_DATA_NONE,
+};
+
 /**
  * struct iommu_device_info - ioctl(IOMMU_DEVICE_GET_INFO)
  * @size: sizeof(struct iommu_device_info)
@@ -461,6 +470,45 @@ struct iommu_hwpt_intel_vtd {
 	__u32 __reserved;
 };
 
+/**
+ * struct iommu_hwpt_alloc - ioctl(IOMMU_HWPT_ALLOC)
+ * @size: sizeof(struct iommu_hwpt_alloc)
+ * @flags: Must be 0
+ * @dev_id: The device to allocate this HWPT for
+ * @pt_id: The parent of this HWPT (IOAS or HWPT)
+ * @data_type: One of enum iommu_pgtbl_data_type
+ * @data_len: Length of the type specific data
+ * @data_uptr: User pointer to the type specific data
+ * @out_hwpt_id: Output HWPT ID for the allocated object
+ * @__reserved: Must be 0
+ *
+ * Allocate hw_pagetable for managing page tables in userspace. Such page
+ * tables can be user-managed or kernel-managed. @pt_id is needed for either
+ * case. While the @data_type, @data_len and @data_uptr are optional. For
+ * the user-managed page tables, userspace should provide the data_type, the
+ * data_len and the type speficific data. While for the kernel-managed page
+ * tables, use the IOMMU_PGTBL_DATA_NONE data_type, @data_len and @data_uptr
+ * will be ignored.
+ *
+ * +==============================+=====================================+
+ * | @data_type                   |      Data structure in @data_uptr   |
+ * +------------------------------+-------------------------------------+
+ * | IOMMU_PGTBL_DATA_NONE        |                 N/A                 |
+ * +------------------------------+-------------------------------------+
+ */
+struct iommu_hwpt_alloc {
+	__u32 size;
+	__u32 flags;
+	__u32 dev_id;
+	__u32 pt_id;
+	__u32 data_type;
+	__u32 data_len;
+	__aligned_u64 data_uptr;
+	__u32 out_hwpt_id;
+	__u32 __reserved;
+};
+#define IOMMU_HWPT_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_ALLOC)
+
 /**
  * enum iommu_vtd_qi_granularity - Intel VT-d specific granularity of
  *				   queued invalidation
-- 
2.34.1


  parent reply	other threads:[~2023-02-09  4:35 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-02-09  4:31 [PATCH 00/17] Add Intel VT-d nested translation Yi Liu
2023-02-09  4:31 ` [PATCH 01/17] iommu: Add new iommu op to create domains owned by userspace Yi Liu
2023-02-10  8:24   ` Tian, Kevin
2023-02-11  3:16     ` Baolu Lu
2023-02-09  4:31 ` [PATCH 02/17] iommu: Add nested domain support Yi Liu
2023-02-14 18:47   ` Nicolin Chen
2023-02-09  4:31 ` [PATCH 03/17] iommu/vt-d: Extend dmar_domain to support nested domain Yi Liu
2023-02-09  4:31 ` [PATCH 04/17] iommu/vt-d: Add helper to setup pasid nested translation Yi Liu
2023-02-09  4:31 ` [PATCH 05/17] iommu/vt-d: Add nested domain support Yi Liu
2023-02-09  4:31 ` [PATCH 06/17] iommufd/hw_pagetable: Use domain_alloc_user op for domain allocation Yi Liu
2023-02-09 17:59   ` Matthew Rosato
2023-02-09 18:36     ` Jason Gunthorpe
2023-02-09 19:51       ` Nicolin Chen
2023-02-09 20:39         ` Jason Gunthorpe
2023-02-09 22:22           ` Nicolin Chen
2023-02-09 23:59             ` Jason Gunthorpe
2023-02-10 10:50             ` Liu, Yi L
2023-02-09  4:31 ` [PATCH 07/17] iommufd: Add/del hwpt to IOAS at alloc/destroy() Yi Liu
2023-02-09  4:31 ` [PATCH 08/17] iommufd: Split iommufd_hw_pagetable_alloc() Yi Liu
2023-02-09  4:31 ` Yi Liu [this message]
2023-02-09 20:45   ` [PATCH 09/17] iommufd: Add kernel-managed hw_pagetable allocation for userspace Jason Gunthorpe
2023-02-10 10:52     ` Liu, Yi L
2023-02-09  4:31 ` [PATCH 10/17] iommufd/device: Move IOAS attaching and detaching operations into helpers Yi Liu
2023-02-09  4:31 ` [PATCH 11/17] iommufd: Add infrastructure for user-managed hw_pagetable allocation Yi Liu
2023-02-09  4:31 ` [PATCH 12/17] iommufd: Add " Yi Liu
2023-02-09  4:31 ` [PATCH 13/17] iommufd/device: Report supported stage-1 page table types Yi Liu
2023-02-09  4:31 ` [PATCH 14/17] iommufd/selftest: Add IOMMU_TEST_OP_MOCK_DOMAIN_REPLACE test op Yi Liu
2023-02-14 18:36   ` Nicolin Chen
2023-02-09  4:31 ` [PATCH 15/17] iommufd/selftest: Add coverage for IOMMU_HWPT_ALLOC ioctl Yi Liu
2023-02-09  4:31 ` [PATCH 16/17] iommufd/selftest: Add IOMMU_TEST_OP_MD_CHECK_IOTLB test op Yi Liu
2023-02-09  4:31 ` [PATCH 17/17] iommufd/selftest: Add coverage for IOMMU_HWPT_INVALIDATE ioctl Yi Liu
2023-02-09 10:11 ` [PATCH 00/17] Add Intel VT-d nested translation Shameerali Kolothum Thodi
2023-02-09 16:10   ` Nicolin Chen
2023-02-09 16:16     ` Shameerali Kolothum Thodi
2023-02-17 18:20 ` Nicolin Chen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230209043153.14964-10-yi.l.liu@intel.com \
    --to=yi.l.liu@intel.com \
    --cc=alex.williamson@redhat.com \
    --cc=baolu.lu@linux.intel.com \
    --cc=chao.p.peng@linux.intel.com \
    --cc=cohuck@redhat.com \
    --cc=eric.auger@redhat.com \
    --cc=iommu@lists.linux.dev \
    --cc=jasowang@redhat.com \
    --cc=jgg@nvidia.com \
    --cc=joro@8bytes.org \
    --cc=kevin.tian@intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=lulu@redhat.com \
    --cc=mjrosato@linux.ibm.com \
    --cc=nicolinc@nvidia.com \
    --cc=peterx@redhat.com \
    --cc=robin.murphy@arm.com \
    --cc=shameerali.kolothum.thodi@huawei.com \
    --cc=suravee.suthikulpanit@amd.com \
    --cc=yi.y.sun@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.