All of lore.kernel.org
 help / color / mirror / Atom feed
From: Lu Baolu <baolu.lu@linux.intel.com>
To: Joerg Roedel <joro@8bytes.org>, Will Deacon <will@kernel.org>,
	Robin Murphy <robin.murphy@arm.com>,
	Jason Gunthorpe <jgg@ziepe.ca>, Kevin Tian <kevin.tian@intel.com>,
	Jean-Philippe Brucker <jean-philippe@linaro.org>,
	Nicolin Chen <nicolinc@nvidia.com>
Cc: Yi Liu <yi.l.liu@intel.com>,
	Jacob Pan <jacob.jun.pan@linux.intel.com>,
	Longfang Liu <liulongfang@huawei.com>,
	Yan Zhao <yan.y.zhao@intel.com>,
	iommu@lists.linux.dev, kvm@vger.kernel.org,
	linux-kernel@vger.kernel.org, Lu Baolu <baolu.lu@linux.intel.com>,
	Jason Gunthorpe <jgg@nvidia.com>
Subject: [PATCH v9 12/14] iommu: Use refcount for fault data access
Date: Wed, 20 Dec 2023 09:23:30 +0800	[thread overview]
Message-ID: <20231220012332.168188-13-baolu.lu@linux.intel.com> (raw)
In-Reply-To: <20231220012332.168188-1-baolu.lu@linux.intel.com>

The per-device fault data structure stores information about faults
occurring on a device. Its lifetime spans from IOPF enablement to
disablement. Multiple paths, including IOPF reporting, handling, and
responding, may access it concurrently.

Previously, a mutex protected the fault data from use after free. But
this is not performance friendly due to the critical nature of IOPF
handling paths.

Refine this with a refcount-based approach. The fault data pointer is
obtained within an RCU read region with a refcount. The fault data
pointer is returned for usage only when the pointer is valid and a
refcount is successfully obtained. The fault data is freed with
kfree_rcu(), ensuring data is only freed after all RCU critical regions
complete.

An iopf handling work starts once an iopf group is created. The handling
work continues until iommu_page_response() is called to respond to the
iopf and the iopf group is freed. During this time, the device fault
parameter should always be available. Add a pointer to the device fault
parameter in the iopf_group structure and hold the reference until the
iopf_group is freed.

Make iommu_page_response() static as it is only used in io-pgfault.c.

Co-developed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Tested-by: Yan Zhao <yan.y.zhao@intel.com>
---
 include/linux/iommu.h      |  17 +++--
 drivers/iommu/io-pgfault.c | 129 +++++++++++++++++++++++--------------
 drivers/iommu/iommu-sva.c  |   2 +-
 3 files changed, 90 insertions(+), 58 deletions(-)

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index dbad2cb9eca2..c2416aa79922 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -41,6 +41,7 @@ struct iommu_dirty_ops;
 struct notifier_block;
 struct iommu_sva;
 struct iommu_dma_cookie;
+struct iommu_fault_param;
 
 #define IOMMU_FAULT_PERM_READ	(1 << 0) /* read */
 #define IOMMU_FAULT_PERM_WRITE	(1 << 1) /* write */
@@ -129,8 +130,9 @@ struct iopf_group {
 	struct iopf_fault last_fault;
 	struct list_head faults;
 	struct work_struct work;
-	struct device *dev;
 	struct iommu_domain *domain;
+	/* The device's fault data parameter. */
+	struct iommu_fault_param *fault_param;
 };
 
 /**
@@ -602,6 +604,8 @@ struct iommu_device {
 /**
  * struct iommu_fault_param - per-device IOMMU fault data
  * @lock: protect pending faults list
+ * @users: user counter to manage the lifetime of the data
+ * @ruc: rcu head for kfree_rcu()
  * @dev: the device that owns this param
  * @queue: IOPF queue
  * @queue_list: index into queue->devices
@@ -611,6 +615,8 @@ struct iommu_device {
  */
 struct iommu_fault_param {
 	struct mutex lock;
+	refcount_t users;
+	struct rcu_head rcu;
 
 	struct device *dev;
 	struct iopf_queue *queue;
@@ -638,7 +644,7 @@ struct iommu_fault_param {
  */
 struct dev_iommu {
 	struct mutex lock;
-	struct iommu_fault_param	*fault_param;
+	struct iommu_fault_param __rcu	*fault_param;
 	struct iommu_fwspec		*fwspec;
 	struct iommu_device		*iommu_dev;
 	void				*priv;
@@ -1466,7 +1472,6 @@ void iopf_queue_free(struct iopf_queue *queue);
 int iopf_queue_discard_partial(struct iopf_queue *queue);
 void iopf_free_group(struct iopf_group *group);
 int iommu_report_device_fault(struct device *dev, struct iopf_fault *evt);
-int iommu_page_response(struct device *dev, struct iommu_page_response *msg);
 int iopf_group_response(struct iopf_group *group,
 			enum iommu_page_response_code status);
 #else
@@ -1511,12 +1516,6 @@ iommu_report_device_fault(struct device *dev, struct iopf_fault *evt)
 	return -ENODEV;
 }
 
-static inline int
-iommu_page_response(struct device *dev, struct iommu_page_response *msg)
-{
-	return -ENODEV;
-}
-
 static inline int iopf_group_response(struct iopf_group *group,
 				      enum iommu_page_response_code status)
 {
diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c
index 5aea8402be47..3a907bad2fcb 100644
--- a/drivers/iommu/io-pgfault.c
+++ b/drivers/iommu/io-pgfault.c
@@ -13,6 +13,32 @@
 
 #include "iommu-priv.h"
 
+/*
+ * Return the fault parameter of a device if it exists. Otherwise, return NULL.
+ * On a successful return, the caller takes a reference of this parameter and
+ * should put it after use by calling iopf_put_dev_fault_param().
+ */
+static struct iommu_fault_param *iopf_get_dev_fault_param(struct device *dev)
+{
+	struct dev_iommu *param = dev->iommu;
+	struct iommu_fault_param *fault_param;
+
+	rcu_read_lock();
+	fault_param = rcu_dereference(param->fault_param);
+	if (fault_param && !refcount_inc_not_zero(&fault_param->users))
+		fault_param = NULL;
+	rcu_read_unlock();
+
+	return fault_param;
+}
+
+/* Caller must hold a reference of the fault parameter. */
+static void iopf_put_dev_fault_param(struct iommu_fault_param *fault_param)
+{
+	if (refcount_dec_and_test(&fault_param->users))
+		kfree_rcu(fault_param, rcu);
+}
+
 void iopf_free_group(struct iopf_group *group)
 {
 	struct iopf_fault *iopf, *next;
@@ -22,6 +48,8 @@ void iopf_free_group(struct iopf_group *group)
 			kfree(iopf);
 	}
 
+	/* Pair with iommu_report_device_fault(). */
+	iopf_put_dev_fault_param(group->fault_param);
 	kfree(group);
 }
 EXPORT_SYMBOL_GPL(iopf_free_group);
@@ -135,7 +163,7 @@ static int iommu_handle_iopf(struct iommu_fault *fault,
 		goto cleanup_partial;
 	}
 
-	group->dev = dev;
+	group->fault_param = iopf_param;
 	group->last_fault.fault = *fault;
 	INIT_LIST_HEAD(&group->faults);
 	group->domain = domain;
@@ -178,64 +206,61 @@ static int iommu_handle_iopf(struct iommu_fault *fault,
  */
 int iommu_report_device_fault(struct device *dev, struct iopf_fault *evt)
 {
+	bool last_prq = evt->fault.type == IOMMU_FAULT_PAGE_REQ &&
+		(evt->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE);
 	struct iommu_fault_param *fault_param;
-	struct iopf_fault *evt_pending = NULL;
-	struct dev_iommu *param = dev->iommu;
-	int ret = 0;
+	struct iopf_fault *evt_pending;
+	int ret;
 
-	mutex_lock(&param->lock);
-	fault_param = param->fault_param;
-	if (!fault_param) {
-		mutex_unlock(&param->lock);
+	fault_param = iopf_get_dev_fault_param(dev);
+	if (!fault_param)
 		return -EINVAL;
-	}
 
 	mutex_lock(&fault_param->lock);
-	if (evt->fault.type == IOMMU_FAULT_PAGE_REQ &&
-	    (evt->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) {
+	if (last_prq) {
 		evt_pending = kmemdup(evt, sizeof(struct iopf_fault),
 				      GFP_KERNEL);
 		if (!evt_pending) {
 			ret = -ENOMEM;
-			goto done_unlock;
+			goto err_unlock;
 		}
 		list_add_tail(&evt_pending->list, &fault_param->faults);
 	}
 
 	ret = iommu_handle_iopf(&evt->fault, fault_param);
-	if (ret && evt_pending) {
+	if (ret)
+		goto err_free;
+
+	mutex_unlock(&fault_param->lock);
+	/* The reference count of fault_param is now held by iopf_group. */
+	if (!last_prq)
+		iopf_put_dev_fault_param(fault_param);
+
+	return 0;
+err_free:
+	if (last_prq) {
 		list_del(&evt_pending->list);
 		kfree(evt_pending);
 	}
-done_unlock:
+err_unlock:
 	mutex_unlock(&fault_param->lock);
-	mutex_unlock(&param->lock);
+	iopf_put_dev_fault_param(fault_param);
 
 	return ret;
 }
 EXPORT_SYMBOL_GPL(iommu_report_device_fault);
 
-int iommu_page_response(struct device *dev,
-			struct iommu_page_response *msg)
+static int iommu_page_response(struct iopf_group *group,
+			       struct iommu_page_response *msg)
 {
 	bool needs_pasid;
 	int ret = -EINVAL;
 	struct iopf_fault *evt;
 	struct iommu_fault_page_request *prm;
-	struct dev_iommu *param = dev->iommu;
-	struct iommu_fault_param *fault_param;
+	struct device *dev = group->fault_param->dev;
 	const struct iommu_ops *ops = dev_iommu_ops(dev);
 	bool has_pasid = msg->flags & IOMMU_PAGE_RESP_PASID_VALID;
-
-	if (!ops->page_response)
-		return -ENODEV;
-
-	mutex_lock(&param->lock);
-	fault_param = param->fault_param;
-	if (!fault_param) {
-		mutex_unlock(&param->lock);
-		return -EINVAL;
-	}
+	struct iommu_fault_param *fault_param = group->fault_param;
 
 	/* Only send response if there is a fault report pending */
 	mutex_lock(&fault_param->lock);
@@ -276,10 +301,9 @@ int iommu_page_response(struct device *dev,
 
 done_unlock:
 	mutex_unlock(&fault_param->lock);
-	mutex_unlock(&param->lock);
+
 	return ret;
 }
-EXPORT_SYMBOL_GPL(iommu_page_response);
 
 /**
  * iopf_queue_flush_dev - Ensure that all queued faults have been processed
@@ -295,22 +319,20 @@ EXPORT_SYMBOL_GPL(iommu_page_response);
  */
 int iopf_queue_flush_dev(struct device *dev)
 {
-	int ret = 0;
 	struct iommu_fault_param *iopf_param;
-	struct dev_iommu *param = dev->iommu;
 
-	if (!param)
+	/*
+	 * It's a driver bug to be here after iopf_queue_remove_device().
+	 * Therefore, it's safe to dereference the fault parameter without
+	 * holding the lock.
+	 */
+	iopf_param = rcu_dereference_check(dev->iommu->fault_param, true);
+	if (WARN_ON(!iopf_param))
 		return -ENODEV;
 
-	mutex_lock(&param->lock);
-	iopf_param = param->fault_param;
-	if (iopf_param)
-		flush_workqueue(iopf_param->queue->wq);
-	else
-		ret = -ENODEV;
-	mutex_unlock(&param->lock);
+	flush_workqueue(iopf_param->queue->wq);
 
-	return ret;
+	return 0;
 }
 EXPORT_SYMBOL_GPL(iopf_queue_flush_dev);
 
@@ -335,7 +357,7 @@ int iopf_group_response(struct iopf_group *group,
 	    (iopf->fault.prm.flags & IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID))
 		resp.flags = IOMMU_PAGE_RESP_PASID_VALID;
 
-	return iommu_page_response(group->dev, &resp);
+	return iommu_page_response(group, &resp);
 }
 EXPORT_SYMBOL_GPL(iopf_group_response);
 
@@ -384,10 +406,15 @@ int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev)
 	int ret = 0;
 	struct dev_iommu *param = dev->iommu;
 	struct iommu_fault_param *fault_param;
+	const struct iommu_ops *ops = dev_iommu_ops(dev);
+
+	if (!ops->page_response)
+		return -ENODEV;
 
 	mutex_lock(&queue->lock);
 	mutex_lock(&param->lock);
-	if (param->fault_param) {
+	if (rcu_dereference_check(param->fault_param,
+				  lockdep_is_held(&param->lock))) {
 		ret = -EBUSY;
 		goto done_unlock;
 	}
@@ -402,10 +429,12 @@ int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev)
 	INIT_LIST_HEAD(&fault_param->faults);
 	INIT_LIST_HEAD(&fault_param->partial);
 	fault_param->dev = dev;
+	refcount_set(&fault_param->users, 1);
+	init_rcu_head(&fault_param->rcu);
 	list_add(&fault_param->queue_list, &queue->devices);
 	fault_param->queue = queue;
 
-	param->fault_param = fault_param;
+	rcu_assign_pointer(param->fault_param, fault_param);
 
 done_unlock:
 	mutex_unlock(&param->lock);
@@ -429,10 +458,12 @@ int iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev)
 	int ret = 0;
 	struct iopf_fault *iopf, *next;
 	struct dev_iommu *param = dev->iommu;
-	struct iommu_fault_param *fault_param = param->fault_param;
+	struct iommu_fault_param *fault_param;
 
 	mutex_lock(&queue->lock);
 	mutex_lock(&param->lock);
+	fault_param = rcu_dereference_check(param->fault_param,
+					    lockdep_is_held(&param->lock));
 	if (!fault_param) {
 		ret = -ENODEV;
 		goto unlock;
@@ -454,8 +485,10 @@ int iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev)
 	list_for_each_entry_safe(iopf, next, &fault_param->partial, list)
 		kfree(iopf);
 
-	param->fault_param = NULL;
-	kfree(fault_param);
+	/* dec the ref owned by iopf_queue_add_device() */
+	rcu_assign_pointer(param->fault_param, NULL);
+	if (refcount_dec_and_test(&fault_param->users))
+		kfree_rcu(fault_param, rcu);
 unlock:
 	mutex_unlock(&param->lock);
 	mutex_unlock(&queue->lock);
diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c
index 9de878e40413..b51995b4fe90 100644
--- a/drivers/iommu/iommu-sva.c
+++ b/drivers/iommu/iommu-sva.c
@@ -251,7 +251,7 @@ static void iommu_sva_handle_iopf(struct work_struct *work)
 
 static int iommu_sva_iopf_handler(struct iopf_group *group)
 {
-	struct iommu_fault_param *fault_param = group->dev->iommu->fault_param;
+	struct iommu_fault_param *fault_param = group->fault_param;
 
 	INIT_WORK(&group->work, iommu_sva_handle_iopf);
 	if (!queue_work(fault_param->queue->wq, &group->work))
-- 
2.34.1


  parent reply	other threads:[~2023-12-20  1:29 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-12-20  1:23 [PATCH v9 00/14] iommu: Prepare to deliver page faults to user space Lu Baolu
2023-12-20  1:23 ` [PATCH v9 01/14] iommu: Move iommu fault data to linux/iommu.h Lu Baolu
2023-12-20  1:23 ` [PATCH v9 02/14] iommu/arm-smmu-v3: Remove unrecoverable faults reporting Lu Baolu
2023-12-20  1:23 ` [PATCH v9 03/14] iommu: Remove unrecoverable fault data Lu Baolu
2023-12-20  1:23 ` [PATCH v9 04/14] iommu: Cleanup iopf data structure definitions Lu Baolu
2023-12-20  1:23 ` [PATCH v9 05/14] iommu: Merge iopf_device_param into iommu_fault_param Lu Baolu
2023-12-20  1:23 ` [PATCH v9 06/14] iommu: Remove iommu_[un]register_device_fault_handler() Lu Baolu
2023-12-20  1:23 ` [PATCH v9 07/14] iommu: Merge iommu_fault_event and iopf_fault Lu Baolu
2023-12-20  1:23 ` [PATCH v9 08/14] iommu: Prepare for separating SVA and IOPF Lu Baolu
2023-12-20  1:23 ` [PATCH v9 09/14] iommu: Make iommu_queue_iopf() more generic Lu Baolu
2023-12-20  1:23 ` [PATCH v9 10/14] iommu: Separate SVA and IOPF Lu Baolu
2023-12-20  1:23 ` [PATCH v9 11/14] iommu: Refine locking for per-device fault data management Lu Baolu
2023-12-20  1:23 ` Lu Baolu [this message]
2024-01-05 16:09   ` [PATCH v9 12/14] iommu: Use refcount for fault data access Jason Gunthorpe
2024-01-09  2:47     ` Baolu Lu
2023-12-20  1:23 ` [PATCH v9 13/14] iommu: Improve iopf_queue_remove_device() Lu Baolu
2024-01-05 16:25   ` Jason Gunthorpe
2024-01-09  3:36     ` Baolu Lu
2023-12-20  1:23 ` [PATCH v9 14/14] iommu: Track iopf group instead of last fault Lu Baolu
2024-01-05 17:53   ` Jason Gunthorpe
2024-01-09  5:55     ` Baolu Lu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231220012332.168188-13-baolu.lu@linux.intel.com \
    --to=baolu.lu@linux.intel.com \
    --cc=iommu@lists.linux.dev \
    --cc=jacob.jun.pan@linux.intel.com \
    --cc=jean-philippe@linaro.org \
    --cc=jgg@nvidia.com \
    --cc=jgg@ziepe.ca \
    --cc=joro@8bytes.org \
    --cc=kevin.tian@intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=liulongfang@huawei.com \
    --cc=nicolinc@nvidia.com \
    --cc=robin.murphy@arm.com \
    --cc=will@kernel.org \
    --cc=yan.y.zhao@intel.com \
    --cc=yi.l.liu@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.