From: Lu Baolu <baolu.lu@linux.intel.com>
To: Jason Gunthorpe <jgg@ziepe.ca>, Kevin Tian <kevin.tian@intel.com>,
Joerg Roedel <joro@8bytes.org>, Will Deacon <will@kernel.org>,
Robin Murphy <robin.murphy@arm.com>,
Jean-Philippe Brucker <jean-philippe@linaro.org>,
Nicolin Chen <nicolinc@nvidia.com>, Yi Liu <yi.l.liu@intel.com>,
Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: iommu@lists.linux.dev, linux-kselftest@vger.kernel.org,
virtualization@lists.linux-foundation.org,
linux-kernel@vger.kernel.org, Lu Baolu <baolu.lu@linux.intel.com>
Subject: [RFC PATCHES 08/17] iommufd: IO page fault delivery initialization and release
Date: Tue, 30 May 2023 13:37:15 +0800 [thread overview]
Message-ID: <20230530053724.232765-9-baolu.lu@linux.intel.com> (raw)
In-Reply-To: <20230530053724.232765-1-baolu.lu@linux.intel.com>
Add some housekeeping code for IO page fault dilivery. Add a fault field
in the iommufd_hw_pagetable structure to store pending IO page faults and
other related data.
The fault field is allocated when an IOPF-capable user HWPT (indicated by
IOMMU_HWPT_ALLOC_FLAGS_IOPF_CAPABLE being set in the allocation user data)
is allocated. This field exists until the HWPT is destroyed. This also
implies that it is possible to determine whether a HWPT is IOPF capable by
checking the fault field.
When an IOPF-capable HWPT is attached to a device (could also be a PASID of
a device in the future), a fault cookie is allocated and set to the device.
The cookie is cleared and freed when HWPT is detached from the device.
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
---
drivers/iommu/iommufd/iommufd_private.h | 12 +++++
drivers/iommu/iommufd/device.c | 61 +++++++++++++++++++++++--
drivers/iommu/iommufd/hw_pagetable.c | 55 ++++++++++++++++++++++
3 files changed, 125 insertions(+), 3 deletions(-)
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index e951815f5707..5ff139acc5c0 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -236,6 +236,13 @@ int iommufd_option_rlimit_mode(struct iommu_option *cmd,
int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd);
+struct hw_pgtable_fault {
+ struct mutex mutex;
+ struct list_head deliver;
+ struct list_head response;
+ struct eventfd_ctx *trigger;
+};
+
/*
* A HW pagetable is called an iommu_domain inside the kernel. This user object
* allows directly creating and inspecting the domains. Domains that have kernel
@@ -252,6 +259,7 @@ struct iommufd_hw_pagetable {
bool msi_cookie : 1;
/* Head at iommufd_ioas::hwpt_list */
struct list_head hwpt_item;
+ struct hw_pgtable_fault *fault;
};
struct iommufd_hw_pagetable *
@@ -314,6 +322,10 @@ struct iommufd_device {
bool has_user_data;
};
+struct iommufd_fault_cookie {
+ struct iommufd_device *idev;
+};
+
static inline struct iommufd_device *
iommufd_get_device(struct iommufd_ucmd *ucmd, u32 id)
{
diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index 29b212714e2c..3408f1fc3e9f 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -374,6 +374,44 @@ static int iommufd_group_setup_msi(struct iommufd_group *igroup,
return 0;
}
+static int iommufd_device_set_fault_cookie(struct iommufd_hw_pagetable *hwpt,
+ struct iommufd_device *idev,
+ ioasid_t pasid)
+{
+ struct iommufd_fault_cookie *fcookie, *curr;
+
+ if (!hwpt->fault)
+ return 0;
+
+ fcookie = kzalloc(sizeof(*fcookie), GFP_KERNEL);
+ if (!fcookie)
+ return -ENOMEM;
+ fcookie->idev = idev;
+
+ curr = iommu_set_device_fault_cookie(idev->dev, pasid, fcookie);
+ if (IS_ERR(curr)) {
+ kfree(fcookie);
+ return PTR_ERR(curr);
+ }
+ kfree(curr);
+
+ return 0;
+}
+
+static void iommufd_device_unset_fault_cookie(struct iommufd_hw_pagetable *hwpt,
+ struct iommufd_device *idev,
+ ioasid_t pasid)
+{
+ struct iommufd_fault_cookie *curr;
+
+ if (!hwpt->fault)
+ return;
+
+ curr = iommu_set_device_fault_cookie(idev->dev, pasid, NULL);
+ WARN_ON(IS_ERR(curr));
+ kfree(curr);
+}
+
int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
struct iommufd_device *idev)
{
@@ -398,6 +436,10 @@ int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
if (rc)
goto err_unlock;
+ rc = iommufd_device_set_fault_cookie(hwpt, idev, 0);
+ if (rc)
+ goto err_unresv;
+
/*
* Only attach to the group once for the first device that is in the
* group. All the other devices will follow this attachment. The user
@@ -408,17 +450,21 @@ int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
if (list_empty(&idev->igroup->device_list)) {
rc = iommufd_group_setup_msi(idev->igroup, hwpt);
if (rc)
- goto err_unresv;
+ goto err_unset;
rc = iommu_attach_group(hwpt->domain, idev->igroup->group);
if (rc)
- goto err_unresv;
+ goto err_unset;
idev->igroup->hwpt = hwpt;
}
+
refcount_inc(&hwpt->obj.users);
list_add_tail(&idev->group_item, &idev->igroup->device_list);
mutex_unlock(&idev->igroup->lock);
return 0;
+
+err_unset:
+ iommufd_device_unset_fault_cookie(hwpt, idev, 0);
err_unresv:
iopt_remove_reserved_iova(&hwpt->ioas->iopt, idev->dev);
err_unlock:
@@ -433,6 +479,7 @@ iommufd_hw_pagetable_detach(struct iommufd_device *idev)
mutex_lock(&idev->igroup->lock);
list_del(&idev->group_item);
+ iommufd_device_unset_fault_cookie(hwpt, idev, 0);
if (list_empty(&idev->igroup->device_list)) {
iommu_detach_group(hwpt->domain, idev->igroup->group);
idev->igroup->hwpt = NULL;
@@ -502,9 +549,14 @@ iommufd_device_do_replace(struct iommufd_device *idev,
if (rc)
goto err_unresv;
+ iommufd_device_unset_fault_cookie(old_hwpt, idev, 0);
+ rc = iommufd_device_set_fault_cookie(hwpt, idev, 0);
+ if (rc)
+ goto err_unresv;
+
rc = iommu_group_replace_domain(igroup->group, hwpt->domain);
if (rc)
- goto err_unresv;
+ goto err_replace;
if (hwpt->ioas != old_hwpt->ioas) {
list_for_each_entry(cur, &igroup->device_list, group_item)
@@ -526,6 +578,9 @@ iommufd_device_do_replace(struct iommufd_device *idev,
/* Caller must destroy old_hwpt */
return old_hwpt;
+err_replace:
+ iommufd_device_unset_fault_cookie(hwpt, idev, 0);
+ iommufd_device_set_fault_cookie(old_hwpt, idev, 0);
err_unresv:
list_for_each_entry(cur, &igroup->device_list, group_item)
iopt_remove_reserved_iova(&hwpt->ioas->iopt, cur->dev);
diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c
index 47ec7ddd5f5d..d6d550c3d0cc 100644
--- a/drivers/iommu/iommufd/hw_pagetable.c
+++ b/drivers/iommu/iommufd/hw_pagetable.c
@@ -3,12 +3,16 @@
* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES
*/
#include <linux/iommu.h>
+#include <linux/eventfd.h>
#include <uapi/linux/iommufd.h>
#include "../iommu-priv.h"
#include "iommufd_private.h"
#include "iommufd_test.h"
+static struct hw_pgtable_fault *hw_pagetable_fault_alloc(int eventfd);
+static void hw_pagetable_fault_free(struct hw_pgtable_fault *fault);
+
void iommufd_hw_pagetable_destroy(struct iommufd_object *obj)
{
struct iommufd_hw_pagetable *hwpt =
@@ -27,6 +31,9 @@ void iommufd_hw_pagetable_destroy(struct iommufd_object *obj)
if (hwpt->parent)
refcount_dec(&hwpt->parent->obj.users);
+
+ if (hwpt->fault)
+ hw_pagetable_fault_free(hwpt->fault);
refcount_dec(&hwpt->ioas->obj.users);
}
@@ -255,6 +262,11 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd)
goto out_put_pt;
}
+ if (!parent && (cmd->flags & IOMMU_HWPT_ALLOC_FLAGS_IOPF_CAPABLE)) {
+ rc = -EINVAL;
+ goto out_put_pt;
+ }
+
if (klen) {
if (!cmd->data_len) {
rc = -EINVAL;
@@ -282,6 +294,14 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd)
goto out_unlock;
}
+ if (cmd->flags & IOMMU_HWPT_ALLOC_FLAGS_IOPF_CAPABLE) {
+ hwpt->fault = hw_pagetable_fault_alloc(cmd->event_fd);
+ if (IS_ERR(hwpt->fault)) {
+ rc = PTR_ERR(hwpt->fault);
+ goto out_hwpt;
+ }
+ }
+
cmd->out_hwpt_id = hwpt->obj.id;
rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
if (rc)
@@ -346,3 +366,38 @@ int iommufd_hwpt_invalidate(struct iommufd_ucmd *ucmd)
iommufd_put_object(&hwpt->obj);
return rc;
}
+
+static struct hw_pgtable_fault *hw_pagetable_fault_alloc(int eventfd)
+{
+ struct hw_pgtable_fault *fault;
+ int rc;
+
+ fault = kzalloc(sizeof(*fault), GFP_KERNEL);
+ if (!fault)
+ return ERR_PTR(-ENOMEM);
+
+ INIT_LIST_HEAD(&fault->deliver);
+ INIT_LIST_HEAD(&fault->response);
+ mutex_init(&fault->mutex);
+
+ fault->trigger = eventfd_ctx_fdget(eventfd);
+ if (IS_ERR(fault->trigger)) {
+ rc = PTR_ERR(fault->trigger);
+ goto out_free;
+ }
+
+ return fault;
+
+out_free:
+ kfree(fault);
+ return ERR_PTR(rc);
+}
+
+static void hw_pagetable_fault_free(struct hw_pgtable_fault *fault)
+{
+ WARN_ON(!list_empty(&fault->deliver));
+ WARN_ON(!list_empty(&fault->response));
+
+ eventfd_ctx_put(fault->trigger);
+ kfree(fault);
+}
--
2.34.1
next prev parent reply other threads:[~2023-05-30 5:38 UTC|newest]
Thread overview: 37+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-05-30 5:37 [RFC PATCHES 00/17] IOMMUFD: Deliver IO page faults to user space Lu Baolu
2023-05-30 5:37 ` [RFC PATCHES 01/17] iommu: Move iommu fault data to linux/iommu.h Lu Baolu
2023-05-30 5:37 ` [RFC PATCHES 02/17] iommu: Support asynchronous I/O page fault response Lu Baolu
2023-05-30 5:37 ` [RFC PATCHES 03/17] iommu: Add helper to set iopf handler for domain Lu Baolu
2023-05-30 5:37 ` [RFC PATCHES 04/17] iommu: Pass device parameter to iopf handler Lu Baolu
2023-05-30 5:37 ` [RFC PATCHES 05/17] iommu: Split IO page fault handling from SVA Lu Baolu
2023-05-30 5:37 ` [RFC PATCHES 06/17] iommu: Add iommu page fault cookie helpers Lu Baolu
2023-05-30 5:37 ` [RFC PATCHES 07/17] iommufd: Add iommu page fault data Lu Baolu
2023-05-30 5:37 ` Lu Baolu [this message]
2023-05-30 5:37 ` [RFC PATCHES 09/17] iommufd: Add iommufd hwpt iopf handler Lu Baolu
2023-05-30 5:37 ` [RFC PATCHES 10/17] iommufd: Add IOMMU_HWPT_ALLOC_FLAGS_USER_PASID_TABLE for hwpt_alloc Lu Baolu
2023-05-30 5:37 ` [RFC PATCHES 11/17] iommufd: Deliver fault messages to user space Lu Baolu
2023-05-30 5:37 ` [RFC PATCHES 12/17] iommufd: Add io page fault response support Lu Baolu
2023-05-30 5:37 ` [RFC PATCHES 13/17] iommufd: Add a timer for each iommufd fault data Lu Baolu
2023-05-30 5:37 ` [RFC PATCHES 14/17] iommufd: Drain all pending faults when destroying hwpt Lu Baolu
2023-05-30 5:37 ` [RFC PATCHES 15/17] iommufd: Allow new hwpt_alloc flags Lu Baolu
2023-05-30 5:37 ` [RFC PATCHES 16/17] iommufd/selftest: Add IOPF feature for mock devices Lu Baolu
2023-05-30 5:37 ` [RFC PATCHES 17/17] iommufd/selftest: Cover iopf-capable nested hwpt Lu Baolu
2023-05-30 18:50 ` [RFC PATCHES 00/17] IOMMUFD: Deliver IO page faults to user space Nicolin Chen
2023-05-31 2:10 ` Baolu Lu
2023-05-31 4:12 ` Nicolin Chen
2023-06-25 6:30 ` Baolu Lu
2023-06-25 19:21 ` Nicolin Chen
2023-06-26 3:10 ` Baolu Lu
2023-06-26 18:02 ` Nicolin Chen
2023-06-26 18:33 ` Jason Gunthorpe
2023-06-28 2:00 ` Baolu Lu
2023-06-28 12:49 ` Jason Gunthorpe
2023-06-29 1:07 ` Baolu Lu
2023-05-31 0:33 ` Jason Gunthorpe
2023-05-31 3:17 ` Baolu Lu
2023-06-23 6:18 ` Baolu Lu
2023-06-23 13:50 ` Jason Gunthorpe
2023-06-16 11:32 ` Jean-Philippe Brucker
2023-06-19 3:35 ` Baolu Lu
2023-06-26 9:51 ` Jean-Philippe Brucker
2023-06-19 12:58 ` Jason Gunthorpe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230530053724.232765-9-baolu.lu@linux.intel.com \
--to=baolu.lu@linux.intel.com \
--cc=iommu@lists.linux.dev \
--cc=jacob.jun.pan@linux.intel.com \
--cc=jean-philippe@linaro.org \
--cc=jgg@ziepe.ca \
--cc=joro@8bytes.org \
--cc=kevin.tian@intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-kselftest@vger.kernel.org \
--cc=nicolinc@nvidia.com \
--cc=robin.murphy@arm.com \
--cc=virtualization@lists.linux-foundation.org \
--cc=will@kernel.org \
--cc=yi.l.liu@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).