iommu.lists.linux-foundation.org archive mirror
 help / color / mirror / Atom feed
From: Lu Baolu <baolu.lu@linux.intel.com>
To: Jason Gunthorpe <jgg@ziepe.ca>, Kevin Tian <kevin.tian@intel.com>,
	Joerg Roedel <joro@8bytes.org>, Will Deacon <will@kernel.org>,
	Robin Murphy <robin.murphy@arm.com>,
	Jean-Philippe Brucker <jean-philippe@linaro.org>,
	Nicolin Chen <nicolinc@nvidia.com>, Yi Liu <yi.l.liu@intel.com>,
	Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: iommu@lists.linux.dev, linux-kselftest@vger.kernel.org,
	virtualization@lists.linux-foundation.org,
	linux-kernel@vger.kernel.org, Lu Baolu <baolu.lu@linux.intel.com>
Subject: [RFC PATCHES 08/17] iommufd: IO page fault delivery initialization and release
Date: Tue, 30 May 2023 13:37:15 +0800	[thread overview]
Message-ID: <20230530053724.232765-9-baolu.lu@linux.intel.com> (raw)
In-Reply-To: <20230530053724.232765-1-baolu.lu@linux.intel.com>

Add some housekeeping code for IO page fault dilivery. Add a fault field
in the iommufd_hw_pagetable structure to store pending IO page faults and
other related data.

The fault field is allocated when an IOPF-capable user HWPT (indicated by
IOMMU_HWPT_ALLOC_FLAGS_IOPF_CAPABLE being set in the allocation user data)
is allocated. This field exists until the HWPT is destroyed. This also
implies that it is possible to determine whether a HWPT is IOPF capable by
checking the fault field.

When an IOPF-capable HWPT is attached to a device (could also be a PASID of
a device in the future), a fault cookie is allocated and set to the device.
The cookie is cleared and freed when HWPT is detached from the device.

Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
---
 drivers/iommu/iommufd/iommufd_private.h | 12 +++++
 drivers/iommu/iommufd/device.c          | 61 +++++++++++++++++++++++--
 drivers/iommu/iommufd/hw_pagetable.c    | 55 ++++++++++++++++++++++
 3 files changed, 125 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index e951815f5707..5ff139acc5c0 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -236,6 +236,13 @@ int iommufd_option_rlimit_mode(struct iommu_option *cmd,
 
 int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd);
 
+struct hw_pgtable_fault {
+	struct mutex mutex;
+	struct list_head deliver;
+	struct list_head response;
+	struct eventfd_ctx *trigger;
+};
+
 /*
  * A HW pagetable is called an iommu_domain inside the kernel. This user object
  * allows directly creating and inspecting the domains. Domains that have kernel
@@ -252,6 +259,7 @@ struct iommufd_hw_pagetable {
 	bool msi_cookie : 1;
 	/* Head at iommufd_ioas::hwpt_list */
 	struct list_head hwpt_item;
+	struct hw_pgtable_fault *fault;
 };
 
 struct iommufd_hw_pagetable *
@@ -314,6 +322,10 @@ struct iommufd_device {
 	bool has_user_data;
 };
 
+struct iommufd_fault_cookie {
+	struct iommufd_device *idev;
+};
+
 static inline struct iommufd_device *
 iommufd_get_device(struct iommufd_ucmd *ucmd, u32 id)
 {
diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index 29b212714e2c..3408f1fc3e9f 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -374,6 +374,44 @@ static int iommufd_group_setup_msi(struct iommufd_group *igroup,
 	return 0;
 }
 
+static int iommufd_device_set_fault_cookie(struct iommufd_hw_pagetable *hwpt,
+					   struct iommufd_device *idev,
+					   ioasid_t pasid)
+{
+	struct iommufd_fault_cookie *fcookie, *curr;
+
+	if (!hwpt->fault)
+		return 0;
+
+	fcookie = kzalloc(sizeof(*fcookie), GFP_KERNEL);
+	if (!fcookie)
+		return -ENOMEM;
+	fcookie->idev = idev;
+
+	curr = iommu_set_device_fault_cookie(idev->dev, pasid, fcookie);
+	if (IS_ERR(curr)) {
+		kfree(fcookie);
+		return PTR_ERR(curr);
+	}
+	kfree(curr);
+
+	return 0;
+}
+
+static void iommufd_device_unset_fault_cookie(struct iommufd_hw_pagetable *hwpt,
+					      struct iommufd_device *idev,
+					      ioasid_t pasid)
+{
+	struct iommufd_fault_cookie *curr;
+
+	if (!hwpt->fault)
+		return;
+
+	curr = iommu_set_device_fault_cookie(idev->dev, pasid, NULL);
+	WARN_ON(IS_ERR(curr));
+	kfree(curr);
+}
+
 int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
 				struct iommufd_device *idev)
 {
@@ -398,6 +436,10 @@ int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
 	if (rc)
 		goto err_unlock;
 
+	rc = iommufd_device_set_fault_cookie(hwpt, idev, 0);
+	if (rc)
+		goto err_unresv;
+
 	/*
 	 * Only attach to the group once for the first device that is in the
 	 * group. All the other devices will follow this attachment. The user
@@ -408,17 +450,21 @@ int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
 	if (list_empty(&idev->igroup->device_list)) {
 		rc = iommufd_group_setup_msi(idev->igroup, hwpt);
 		if (rc)
-			goto err_unresv;
+			goto err_unset;
 
 		rc = iommu_attach_group(hwpt->domain, idev->igroup->group);
 		if (rc)
-			goto err_unresv;
+			goto err_unset;
 		idev->igroup->hwpt = hwpt;
 	}
+
 	refcount_inc(&hwpt->obj.users);
 	list_add_tail(&idev->group_item, &idev->igroup->device_list);
 	mutex_unlock(&idev->igroup->lock);
 	return 0;
+
+err_unset:
+	iommufd_device_unset_fault_cookie(hwpt, idev, 0);
 err_unresv:
 	iopt_remove_reserved_iova(&hwpt->ioas->iopt, idev->dev);
 err_unlock:
@@ -433,6 +479,7 @@ iommufd_hw_pagetable_detach(struct iommufd_device *idev)
 
 	mutex_lock(&idev->igroup->lock);
 	list_del(&idev->group_item);
+	iommufd_device_unset_fault_cookie(hwpt, idev, 0);
 	if (list_empty(&idev->igroup->device_list)) {
 		iommu_detach_group(hwpt->domain, idev->igroup->group);
 		idev->igroup->hwpt = NULL;
@@ -502,9 +549,14 @@ iommufd_device_do_replace(struct iommufd_device *idev,
 	if (rc)
 		goto err_unresv;
 
+	iommufd_device_unset_fault_cookie(old_hwpt, idev, 0);
+	rc = iommufd_device_set_fault_cookie(hwpt, idev, 0);
+	if (rc)
+		goto err_unresv;
+
 	rc = iommu_group_replace_domain(igroup->group, hwpt->domain);
 	if (rc)
-		goto err_unresv;
+		goto err_replace;
 
 	if (hwpt->ioas != old_hwpt->ioas) {
 		list_for_each_entry(cur, &igroup->device_list, group_item)
@@ -526,6 +578,9 @@ iommufd_device_do_replace(struct iommufd_device *idev,
 
 	/* Caller must destroy old_hwpt */
 	return old_hwpt;
+err_replace:
+	iommufd_device_unset_fault_cookie(hwpt, idev, 0);
+	iommufd_device_set_fault_cookie(old_hwpt, idev, 0);
 err_unresv:
 	list_for_each_entry(cur, &igroup->device_list, group_item)
 		iopt_remove_reserved_iova(&hwpt->ioas->iopt, cur->dev);
diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c
index 47ec7ddd5f5d..d6d550c3d0cc 100644
--- a/drivers/iommu/iommufd/hw_pagetable.c
+++ b/drivers/iommu/iommufd/hw_pagetable.c
@@ -3,12 +3,16 @@
  * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES
  */
 #include <linux/iommu.h>
+#include <linux/eventfd.h>
 #include <uapi/linux/iommufd.h>
 
 #include "../iommu-priv.h"
 #include "iommufd_private.h"
 #include "iommufd_test.h"
 
+static struct hw_pgtable_fault *hw_pagetable_fault_alloc(int eventfd);
+static void hw_pagetable_fault_free(struct hw_pgtable_fault *fault);
+
 void iommufd_hw_pagetable_destroy(struct iommufd_object *obj)
 {
 	struct iommufd_hw_pagetable *hwpt =
@@ -27,6 +31,9 @@ void iommufd_hw_pagetable_destroy(struct iommufd_object *obj)
 
 	if (hwpt->parent)
 		refcount_dec(&hwpt->parent->obj.users);
+
+	if (hwpt->fault)
+		hw_pagetable_fault_free(hwpt->fault);
 	refcount_dec(&hwpt->ioas->obj.users);
 }
 
@@ -255,6 +262,11 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd)
 		goto out_put_pt;
 	}
 
+	if (!parent && (cmd->flags & IOMMU_HWPT_ALLOC_FLAGS_IOPF_CAPABLE)) {
+		rc = -EINVAL;
+		goto out_put_pt;
+	}
+
 	if (klen) {
 		if (!cmd->data_len) {
 			rc = -EINVAL;
@@ -282,6 +294,14 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd)
 		goto out_unlock;
 	}
 
+	if (cmd->flags & IOMMU_HWPT_ALLOC_FLAGS_IOPF_CAPABLE) {
+		hwpt->fault = hw_pagetable_fault_alloc(cmd->event_fd);
+		if (IS_ERR(hwpt->fault)) {
+			rc = PTR_ERR(hwpt->fault);
+			goto out_hwpt;
+		}
+	}
+
 	cmd->out_hwpt_id = hwpt->obj.id;
 	rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
 	if (rc)
@@ -346,3 +366,38 @@ int iommufd_hwpt_invalidate(struct iommufd_ucmd *ucmd)
 	iommufd_put_object(&hwpt->obj);
 	return rc;
 }
+
+static struct hw_pgtable_fault *hw_pagetable_fault_alloc(int eventfd)
+{
+	struct hw_pgtable_fault *fault;
+	int rc;
+
+	fault = kzalloc(sizeof(*fault), GFP_KERNEL);
+	if (!fault)
+		return ERR_PTR(-ENOMEM);
+
+	INIT_LIST_HEAD(&fault->deliver);
+	INIT_LIST_HEAD(&fault->response);
+	mutex_init(&fault->mutex);
+
+	fault->trigger = eventfd_ctx_fdget(eventfd);
+	if (IS_ERR(fault->trigger)) {
+		rc = PTR_ERR(fault->trigger);
+		goto out_free;
+	}
+
+	return fault;
+
+out_free:
+	kfree(fault);
+	return ERR_PTR(rc);
+}
+
+static void hw_pagetable_fault_free(struct hw_pgtable_fault *fault)
+{
+	WARN_ON(!list_empty(&fault->deliver));
+	WARN_ON(!list_empty(&fault->response));
+
+	eventfd_ctx_put(fault->trigger);
+	kfree(fault);
+}
-- 
2.34.1


  parent reply	other threads:[~2023-05-30  5:38 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-05-30  5:37 [RFC PATCHES 00/17] IOMMUFD: Deliver IO page faults to user space Lu Baolu
2023-05-30  5:37 ` [RFC PATCHES 01/17] iommu: Move iommu fault data to linux/iommu.h Lu Baolu
2023-05-30  5:37 ` [RFC PATCHES 02/17] iommu: Support asynchronous I/O page fault response Lu Baolu
2023-05-30  5:37 ` [RFC PATCHES 03/17] iommu: Add helper to set iopf handler for domain Lu Baolu
2023-05-30  5:37 ` [RFC PATCHES 04/17] iommu: Pass device parameter to iopf handler Lu Baolu
2023-05-30  5:37 ` [RFC PATCHES 05/17] iommu: Split IO page fault handling from SVA Lu Baolu
2023-05-30  5:37 ` [RFC PATCHES 06/17] iommu: Add iommu page fault cookie helpers Lu Baolu
2023-05-30  5:37 ` [RFC PATCHES 07/17] iommufd: Add iommu page fault data Lu Baolu
2023-05-30  5:37 ` Lu Baolu [this message]
2023-05-30  5:37 ` [RFC PATCHES 09/17] iommufd: Add iommufd hwpt iopf handler Lu Baolu
2023-05-30  5:37 ` [RFC PATCHES 10/17] iommufd: Add IOMMU_HWPT_ALLOC_FLAGS_USER_PASID_TABLE for hwpt_alloc Lu Baolu
2023-05-30  5:37 ` [RFC PATCHES 11/17] iommufd: Deliver fault messages to user space Lu Baolu
2023-05-30  5:37 ` [RFC PATCHES 12/17] iommufd: Add io page fault response support Lu Baolu
2023-05-30  5:37 ` [RFC PATCHES 13/17] iommufd: Add a timer for each iommufd fault data Lu Baolu
2023-05-30  5:37 ` [RFC PATCHES 14/17] iommufd: Drain all pending faults when destroying hwpt Lu Baolu
2023-05-30  5:37 ` [RFC PATCHES 15/17] iommufd: Allow new hwpt_alloc flags Lu Baolu
2023-05-30  5:37 ` [RFC PATCHES 16/17] iommufd/selftest: Add IOPF feature for mock devices Lu Baolu
2023-05-30  5:37 ` [RFC PATCHES 17/17] iommufd/selftest: Cover iopf-capable nested hwpt Lu Baolu
2023-05-30 18:50 ` [RFC PATCHES 00/17] IOMMUFD: Deliver IO page faults to user space Nicolin Chen
2023-05-31  2:10   ` Baolu Lu
2023-05-31  4:12     ` Nicolin Chen
2023-06-25  6:30   ` Baolu Lu
2023-06-25 19:21     ` Nicolin Chen
2023-06-26  3:10       ` Baolu Lu
2023-06-26 18:02         ` Nicolin Chen
2023-06-26 18:33     ` Jason Gunthorpe
2023-06-28  2:00       ` Baolu Lu
2023-06-28 12:49         ` Jason Gunthorpe
2023-06-29  1:07           ` Baolu Lu
2023-05-31  0:33 ` Jason Gunthorpe
2023-05-31  3:17   ` Baolu Lu
2023-06-23  6:18   ` Baolu Lu
2023-06-23 13:50     ` Jason Gunthorpe
2023-06-16 11:32 ` Jean-Philippe Brucker
2023-06-19  3:35   ` Baolu Lu
2023-06-26  9:51     ` Jean-Philippe Brucker
2023-06-19 12:58   ` Jason Gunthorpe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230530053724.232765-9-baolu.lu@linux.intel.com \
    --to=baolu.lu@linux.intel.com \
    --cc=iommu@lists.linux.dev \
    --cc=jacob.jun.pan@linux.intel.com \
    --cc=jean-philippe@linaro.org \
    --cc=jgg@ziepe.ca \
    --cc=joro@8bytes.org \
    --cc=kevin.tian@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=nicolinc@nvidia.com \
    --cc=robin.murphy@arm.com \
    --cc=virtualization@lists.linux-foundation.org \
    --cc=will@kernel.org \
    --cc=yi.l.liu@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).