All of lore.kernel.org
 help / color / mirror / Atom feed
From: Yi Liu <yi.l.liu@intel.com>
To: joro@8bytes.org, alex.williamson@redhat.com, jgg@nvidia.com,
	kevin.tian@intel.com, robin.murphy@arm.com
Cc: cohuck@redhat.com, eric.auger@redhat.com, nicolinc@nvidia.com,
	kvm@vger.kernel.org, mjrosato@linux.ibm.com,
	chao.p.peng@linux.intel.com, yi.l.liu@intel.com,
	yi.y.sun@linux.intel.com, peterx@redhat.com, jasowang@redhat.com,
	shameerali.kolothum.thodi@huawei.com, lulu@redhat.com,
	suravee.suthikulpanit@amd.com, iommu@lists.linux.dev,
	linux-kernel@vger.kernel.org, linux-kselftest@vger.kernel.org,
	baolu.lu@linux.intel.com
Subject: [PATCH 12/17] iommufd: Add user-managed hw_pagetable allocation
Date: Wed,  8 Feb 2023 20:31:48 -0800	[thread overview]
Message-ID: <20230209043153.14964-13-yi.l.liu@intel.com> (raw)
In-Reply-To: <20230209043153.14964-1-yi.l.liu@intel.com>

As the introduction of nested translation, there are page tables managed
by userspace. hw_pagetables can be stage-1 pagetable, stage-2 pagetable or
just standalone pagetable.

Stage-2 page table and standalone pagetable are kernel-managed for security.
iommufd has already supported it.

Stage-1 pagetable is user-managed and needs to work with a stage-2 page table.
Hence, userspace should provide a hw_pagetable ID that points to a stage-2
hw_pagetable. Since stage-1 is user-managed, so an ioctl is added to sync
the IOTLB when there is modification in the stage-1 page table.

The first available user-managed hw_pagtable type is the Intel VT-d stage-1
pagetable for nested translation.

Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
---
 drivers/iommu/iommufd/device.c          |  3 +-
 drivers/iommu/iommufd/hw_pagetable.c    | 71 ++++++++++++++++++++++++-
 drivers/iommu/iommufd/iommufd_private.h |  1 +
 drivers/iommu/iommufd/main.c            |  8 +++
 include/uapi/linux/iommufd.h            | 34 ++++++++++++
 5 files changed, 114 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index 6d948fa418d5..c19e2f54a44f 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -164,7 +164,8 @@ struct device *iommufd_obj_dev(struct iommufd_object *obj)
  * indexed by the members defined in enum iommu_device_data_type.
  */
 const u64 iommufd_supported_pgtbl_types[] =  {
-	[IOMMU_DEVICE_DATA_INTEL_VTD] = BIT_ULL(IOMMU_PGTBL_DATA_NONE),
+	[IOMMU_DEVICE_DATA_INTEL_VTD] = BIT_ULL(IOMMU_PGTBL_DATA_NONE) |
+					BIT_ULL(IOMMU_PGTBL_DATA_VTD_S1),
 };
 
 int iommufd_device_get_info(struct iommufd_ucmd *ucmd)
diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c
index 02dee8e8d958..44a75ccc8e08 100644
--- a/drivers/iommu/iommufd/hw_pagetable.c
+++ b/drivers/iommu/iommufd/hw_pagetable.c
@@ -108,11 +108,12 @@ iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
  */
 static const size_t iommufd_hwpt_info_size[] = {
 	[IOMMU_PGTBL_DATA_NONE] = 0,
+	[IOMMU_PGTBL_DATA_VTD_S1] = sizeof(struct iommu_hwpt_intel_vtd),
 };
 
 int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd)
 {
-	struct iommufd_hw_pagetable *hwpt;
+	struct iommufd_hw_pagetable *hwpt, *parent = NULL;
 	struct iommu_hwpt_alloc *cmd = ucmd->cmd;
 	struct iommufd_ctx *ictx = ucmd->ictx;
 	struct iommufd_object *pt_obj = NULL;
@@ -160,6 +161,19 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd)
 	}
 
 	switch (pt_obj->type) {
+	case IOMMUFD_OBJ_HW_PAGETABLE:
+		parent = container_of(pt_obj, struct iommufd_hw_pagetable, obj);
+		/*
+		 * Cannot allocate user-managed hwpt linking to auto_created
+		 * hwpt. If the parent hwpt is already a user-managed hwpt,
+		 * don't allocate another user-managed hwpt linking to it.
+		 */
+		if (parent->auto_domain || parent->parent) {
+			rc = -EINVAL;
+			goto out_put_pt;
+		}
+		ioas = parent->ioas;
+		break;
 	case IOMMUFD_OBJ_IOAS:
 		ioas = container_of(pt_obj, struct iommufd_ioas, obj);
 		break;
@@ -189,7 +203,7 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd)
 	}
 
 	mutex_lock(&ioas->mutex);
-	hwpt = __iommufd_hw_pagetable_alloc(ictx, ioas, dev, NULL, data);
+	hwpt = __iommufd_hw_pagetable_alloc(ictx, ioas, dev, parent, data);
 	mutex_unlock(&ioas->mutex);
 	if (IS_ERR(hwpt)) {
 		rc = PTR_ERR(hwpt);
@@ -217,3 +231,56 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd)
 	iommufd_put_object(dev_obj);
 	return rc;
 }
+
+static u32 iommufd_hwpt_invalidate_info_size[] = {
+	[IOMMU_PGTBL_DATA_VTD_S1] = sizeof(struct iommu_hwpt_invalidate_intel_vtd),
+};
+
+int iommufd_hwpt_invalidate(struct iommufd_ucmd *ucmd)
+{
+	struct iommu_hwpt_invalidate *cmd = ucmd->cmd;
+	struct iommufd_hw_pagetable *hwpt;
+	u64 user_ptr;
+	u32 user_data_len, klen;
+	int rc = 0;
+
+	/*
+	 * No invalidation needed for type==IOMMU_PGTBL_DATA_NONE.
+	 * data_len should not exceed the size of iommufd_invalidate_buffer.
+	 */
+	if (cmd->data_type == IOMMU_PGTBL_DATA_NONE || !cmd->data_len)
+		return -EOPNOTSUPP;
+
+	hwpt = iommufd_get_hwpt(ucmd, cmd->hwpt_id);
+	if (IS_ERR(hwpt))
+		return PTR_ERR(hwpt);
+
+	/* Do not allow any kernel-managed hw_pagetable */
+	if (!hwpt->parent) {
+		rc = -EINVAL;
+		goto out_put_hwpt;
+	}
+
+	klen = iommufd_hwpt_invalidate_info_size[cmd->data_type];
+	if (!klen) {
+		rc = -EINVAL;
+		goto out_put_hwpt;
+	}
+
+	/*
+	 * copy the needed fields before reusing the ucmd buffer, this
+	 * avoids memory allocation in this path.
+	 */
+	user_ptr = cmd->data_uptr;
+	user_data_len = cmd->data_len;
+
+	rc = copy_struct_from_user(cmd, klen,
+				   u64_to_user_ptr(user_ptr), user_data_len);
+	if (rc)
+		goto out_put_hwpt;
+
+	hwpt->domain->ops->iotlb_sync_user(hwpt->domain, cmd);
+out_put_hwpt:
+	iommufd_put_object(&hwpt->obj);
+	return rc;
+}
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index 5ef034451f4b..bb341e633c18 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -267,6 +267,7 @@ iommufd_get_hwpt(struct iommufd_ucmd *ucmd, u32 id)
 }
 
 int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd);
+int iommufd_hwpt_invalidate(struct iommufd_ucmd *ucmd);
 
 struct device *iommufd_obj_dev(struct iommufd_object *obj);
 
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index 831303d64abe..6e2d8805daf3 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -252,6 +252,12 @@ union ucmd_buffer {
 	struct iommu_destroy destroy;
 	struct iommu_device_info info;
 	struct iommu_hwpt_alloc hwpt;
+	struct iommu_hwpt_invalidate cache;
+	/*
+	 * data_type specific structure used in the cache invalidation
+	 * path.
+	 */
+	struct iommu_hwpt_invalidate_intel_vtd vtd;
 	struct iommu_ioas_alloc alloc;
 	struct iommu_ioas_allow_iovas allow_iovas;
 	struct iommu_ioas_copy ioas_copy;
@@ -287,6 +293,8 @@ static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = {
 		 __reserved),
 	IOCTL_OP(IOMMU_HWPT_ALLOC, iommufd_hwpt_alloc, struct iommu_hwpt_alloc,
 		 __reserved),
+	IOCTL_OP(IOMMU_HWPT_INVALIDATE, iommufd_hwpt_invalidate,
+		 struct iommu_hwpt_invalidate, data_uptr),
 	IOCTL_OP(IOMMU_IOAS_ALLOC, iommufd_ioas_alloc_ioctl,
 		 struct iommu_ioas_alloc, out_ioas_id),
 	IOCTL_OP(IOMMU_IOAS_ALLOW_IOVAS, iommufd_ioas_allow_iovas,
diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index f501add5ffe9..cb6a9ee215f4 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -47,6 +47,7 @@ enum {
 	IOMMUFD_CMD_VFIO_IOAS,
 	IOMMUFD_CMD_DEVICE_GET_INFO,
 	IOMMUFD_CMD_HWPT_ALLOC,
+	IOMMUFD_CMD_HWPT_INVALIDATE,
 };
 
 /**
@@ -377,9 +378,11 @@ struct iommu_device_info_vtd {
 /**
  * enum iommu_pgtbl_data_type - IOMMU Page Table User Data type
  * @IOMMU_PGTBL_DATA_NONE: no user data
+ * @IOMMU_PGTBL_DATA_VTD_S1: Data for Intel VT-d stage-1 page table
  */
 enum iommu_pgtbl_data_type {
 	IOMMU_PGTBL_DATA_NONE,
+	IOMMU_PGTBL_DATA_VTD_S1,
 };
 
 /**
@@ -495,6 +498,8 @@ struct iommu_hwpt_intel_vtd {
  * +------------------------------+-------------------------------------+
  * | IOMMU_PGTBL_DATA_NONE        |                 N/A                 |
  * +------------------------------+-------------------------------------+
+ * | IOMMU_PGTBL_DATA_VTD_S1      |      struct iommu_hwpt_intel_vtd    |
+ * +------------------------------+-------------------------------------+
  */
 struct iommu_hwpt_alloc {
 	__u32 size;
@@ -562,4 +567,33 @@ struct iommu_hwpt_invalidate_intel_vtd {
 	__u64 granule_size;
 	__u64 nb_granules;
 };
+
+/**
+ * struct iommu_hwpt_invalidate - ioctl(IOMMU_HWPT_INVALIDATE)
+ * @size: sizeof(struct iommu_hwpt_invalidate)
+ * @hwpt_id: HWPT ID of target hardware page table for the invalidation
+ * @data_type: One of enum iommu_pgtbl_data_type
+ * @data_len: Length of the type specific data
+ * @data_uptr: User pointer to the type specific data
+ *
+ * Invalidate the iommu cache for user-managed page table. Modifications
+ * on user-managed page table should be followed with this operation to
+ * sync the userspace with the kernel and underlying hardware. This operation
+ * is only needed by user-managed hw_pagetables, so the @data_type should
+ * never be IOMMU_PGTBL_DATA_NONE.
+ *
+ * +==============================+========================================+
+ * | @data_type                   |     Data structure in @data_uptr       |
+ * +------------------------------+----------------------------------------+
+ * | IOMMU_PGTBL_DATA_VTD_S1      | struct iommu_hwpt_invalidate_intel_vtd |
+ * +------------------------------+----------------------------------------+
+ */
+struct iommu_hwpt_invalidate {
+	__u32 size;
+	__u32 hwpt_id;
+	__u32 data_type;
+	__u32 data_len;
+	__aligned_u64 data_uptr;
+};
+#define IOMMU_HWPT_INVALIDATE _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_INVALIDATE)
 #endif
-- 
2.34.1


  parent reply	other threads:[~2023-02-09  4:36 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-02-09  4:31 [PATCH 00/17] Add Intel VT-d nested translation Yi Liu
2023-02-09  4:31 ` [PATCH 01/17] iommu: Add new iommu op to create domains owned by userspace Yi Liu
2023-02-10  8:24   ` Tian, Kevin
2023-02-11  3:16     ` Baolu Lu
2023-02-09  4:31 ` [PATCH 02/17] iommu: Add nested domain support Yi Liu
2023-02-14 18:47   ` Nicolin Chen
2023-02-09  4:31 ` [PATCH 03/17] iommu/vt-d: Extend dmar_domain to support nested domain Yi Liu
2023-02-09  4:31 ` [PATCH 04/17] iommu/vt-d: Add helper to setup pasid nested translation Yi Liu
2023-02-09  4:31 ` [PATCH 05/17] iommu/vt-d: Add nested domain support Yi Liu
2023-02-09  4:31 ` [PATCH 06/17] iommufd/hw_pagetable: Use domain_alloc_user op for domain allocation Yi Liu
2023-02-09 17:59   ` Matthew Rosato
2023-02-09 18:36     ` Jason Gunthorpe
2023-02-09 19:51       ` Nicolin Chen
2023-02-09 20:39         ` Jason Gunthorpe
2023-02-09 22:22           ` Nicolin Chen
2023-02-09 23:59             ` Jason Gunthorpe
2023-02-10 10:50             ` Liu, Yi L
2023-02-09  4:31 ` [PATCH 07/17] iommufd: Add/del hwpt to IOAS at alloc/destroy() Yi Liu
2023-02-09  4:31 ` [PATCH 08/17] iommufd: Split iommufd_hw_pagetable_alloc() Yi Liu
2023-02-09  4:31 ` [PATCH 09/17] iommufd: Add kernel-managed hw_pagetable allocation for userspace Yi Liu
2023-02-09 20:45   ` Jason Gunthorpe
2023-02-10 10:52     ` Liu, Yi L
2023-02-09  4:31 ` [PATCH 10/17] iommufd/device: Move IOAS attaching and detaching operations into helpers Yi Liu
2023-02-09  4:31 ` [PATCH 11/17] iommufd: Add infrastructure for user-managed hw_pagetable allocation Yi Liu
2023-02-09  4:31 ` Yi Liu [this message]
2023-02-09  4:31 ` [PATCH 13/17] iommufd/device: Report supported stage-1 page table types Yi Liu
2023-02-09  4:31 ` [PATCH 14/17] iommufd/selftest: Add IOMMU_TEST_OP_MOCK_DOMAIN_REPLACE test op Yi Liu
2023-02-14 18:36   ` Nicolin Chen
2023-02-09  4:31 ` [PATCH 15/17] iommufd/selftest: Add coverage for IOMMU_HWPT_ALLOC ioctl Yi Liu
2023-02-09  4:31 ` [PATCH 16/17] iommufd/selftest: Add IOMMU_TEST_OP_MD_CHECK_IOTLB test op Yi Liu
2023-02-09  4:31 ` [PATCH 17/17] iommufd/selftest: Add coverage for IOMMU_HWPT_INVALIDATE ioctl Yi Liu
2023-02-09 10:11 ` [PATCH 00/17] Add Intel VT-d nested translation Shameerali Kolothum Thodi
2023-02-09 16:10   ` Nicolin Chen
2023-02-09 16:16     ` Shameerali Kolothum Thodi
2023-02-17 18:20 ` Nicolin Chen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230209043153.14964-13-yi.l.liu@intel.com \
    --to=yi.l.liu@intel.com \
    --cc=alex.williamson@redhat.com \
    --cc=baolu.lu@linux.intel.com \
    --cc=chao.p.peng@linux.intel.com \
    --cc=cohuck@redhat.com \
    --cc=eric.auger@redhat.com \
    --cc=iommu@lists.linux.dev \
    --cc=jasowang@redhat.com \
    --cc=jgg@nvidia.com \
    --cc=joro@8bytes.org \
    --cc=kevin.tian@intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=lulu@redhat.com \
    --cc=mjrosato@linux.ibm.com \
    --cc=nicolinc@nvidia.com \
    --cc=peterx@redhat.com \
    --cc=robin.murphy@arm.com \
    --cc=shameerali.kolothum.thodi@huawei.com \
    --cc=suravee.suthikulpanit@amd.com \
    --cc=yi.y.sun@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.