All of lore.kernel.org
 help / color / mirror / Atom feed
From: Xie Yongji <xieyongji@bytedance.com>
To: mst@redhat.com, jasowang@redhat.com, tglx@linutronix.de, hch@lst.de
Cc: virtualization@lists.linux-foundation.org, linux-kernel@vger.kernel.org
Subject: [PATCH v3 05/11] vduse: Support automatic irq callback affinity
Date: Tue, 28 Feb 2023 17:41:04 +0800	[thread overview]
Message-ID: <20230228094110.37-6-xieyongji@bytedance.com> (raw)
In-Reply-To: <20230228094110.37-1-xieyongji@bytedance.com>

This brings current interrupt affinity spreading mechanism
to vduse device. We will make use of group_cpus_evenly()
to create an irq callback affinity mask for each virtqueue of
vduse device. Then we will spread IRQs between CPUs in the affinity
mask, in a round-robin manner, to run the irq callback.

Signed-off-by: Xie Yongji <xieyongji@bytedance.com>
---
 drivers/vdpa/vdpa_user/vduse_dev.c | 130 +++++++++++++++++++++++++++--
 1 file changed, 123 insertions(+), 7 deletions(-)

diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c
index 98359d87a06f..bde28a8692d5 100644
--- a/drivers/vdpa/vdpa_user/vduse_dev.c
+++ b/drivers/vdpa/vdpa_user/vduse_dev.c
@@ -23,6 +23,8 @@
 #include <linux/nospec.h>
 #include <linux/vmalloc.h>
 #include <linux/sched/mm.h>
+#include <linux/interrupt.h>
+#include <linux/group_cpus.h>
 #include <uapi/linux/vduse.h>
 #include <uapi/linux/vdpa.h>
 #include <uapi/linux/virtio_config.h>
@@ -41,6 +43,8 @@
 #define VDUSE_IOVA_SIZE (128 * 1024 * 1024)
 #define VDUSE_MSG_DEFAULT_TIMEOUT 30
 
+#define IRQ_UNBOUND -1
+
 struct vduse_virtqueue {
 	u16 index;
 	u16 num_max;
@@ -57,6 +61,8 @@ struct vduse_virtqueue {
 	struct vdpa_callback cb;
 	struct work_struct inject;
 	struct work_struct kick;
+	int irq_effective_cpu;
+	struct cpumask irq_affinity;
 };
 
 struct vduse_dev;
@@ -128,6 +134,7 @@ static struct class *vduse_class;
 static struct cdev vduse_ctrl_cdev;
 static struct cdev vduse_cdev;
 static struct workqueue_struct *vduse_irq_wq;
+static struct workqueue_struct *vduse_irq_bound_wq;
 
 static u32 allowed_device_id[] = {
 	VIRTIO_ID_BLOCK,
@@ -708,6 +715,82 @@ static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa)
 	return dev->generation;
 }
 
+static void default_calc_sets(struct irq_affinity *affd, unsigned int affvecs)
+{
+	affd->nr_sets = 1;
+	affd->set_size[0] = affvecs;
+}
+
+struct cpumask *
+create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd)
+{
+	unsigned int affvecs = 0, curvec, usedvecs, i;
+	struct cpumask *masks = NULL;
+
+	if (nvecs > affd->pre_vectors + affd->post_vectors)
+		affvecs = nvecs - affd->pre_vectors - affd->post_vectors;
+
+	if (!affd->calc_sets)
+		affd->calc_sets = default_calc_sets;
+
+	affd->calc_sets(affd, affvecs);
+
+	if (!affvecs)
+		return NULL;
+
+	masks = kcalloc(nvecs, sizeof(*masks), GFP_KERNEL);
+	if (!masks)
+		return NULL;
+
+	/* Fill out vectors at the beginning that don't need affinity */
+	for (curvec = 0; curvec < affd->pre_vectors; curvec++)
+		cpumask_setall(&masks[curvec]);
+
+	for (i = 0, usedvecs = 0; i < affd->nr_sets; i++) {
+		unsigned int this_vecs = affd->set_size[i];
+		int j;
+		struct cpumask *result = group_cpus_evenly(this_vecs);
+
+		if (!result) {
+			kfree(masks);
+			return NULL;
+		}
+
+		for (j = 0; j < this_vecs; j++)
+			cpumask_copy(&masks[curvec + j], &result[j]);
+		kfree(result);
+
+		curvec += this_vecs;
+		usedvecs += this_vecs;
+	}
+
+	/* Fill out vectors at the end that don't need affinity */
+	if (usedvecs >= affvecs)
+		curvec = affd->pre_vectors + affvecs;
+	else
+		curvec = affd->pre_vectors + usedvecs;
+	for (; curvec < nvecs; curvec++)
+		cpumask_setall(&masks[curvec]);
+
+	return masks;
+}
+
+static void vduse_vdpa_set_irq_affinity(struct vdpa_device *vdpa,
+					struct irq_affinity *desc)
+{
+	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+	struct cpumask *masks;
+	int i;
+
+	masks = create_affinity_masks(dev->vq_num, desc);
+	if (!masks)
+		return;
+
+	for (i = 0; i < dev->vq_num; i++)
+		cpumask_copy(&dev->vqs[i]->irq_affinity, &masks[i]);
+	kfree(masks);
+}
+
 static int vduse_vdpa_set_map(struct vdpa_device *vdpa,
 				unsigned int asid,
 				struct vhost_iotlb *iotlb)
@@ -758,6 +841,7 @@ static const struct vdpa_config_ops vduse_vdpa_config_ops = {
 	.get_config		= vduse_vdpa_get_config,
 	.set_config		= vduse_vdpa_set_config,
 	.get_generation		= vduse_vdpa_get_generation,
+	.set_irq_affinity	= vduse_vdpa_set_irq_affinity,
 	.reset			= vduse_vdpa_reset,
 	.set_map		= vduse_vdpa_set_map,
 	.free			= vduse_vdpa_free,
@@ -917,7 +1001,8 @@ static void vduse_vq_irq_inject(struct work_struct *work)
 }
 
 static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
-				    struct work_struct *irq_work)
+				    struct work_struct *irq_work,
+				    int irq_effective_cpu)
 {
 	int ret = -EINVAL;
 
@@ -926,7 +1011,11 @@ static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
 		goto unlock;
 
 	ret = 0;
-	queue_work(vduse_irq_wq, irq_work);
+	if (irq_effective_cpu == IRQ_UNBOUND)
+		queue_work(vduse_irq_wq, irq_work);
+	else
+		queue_work_on(irq_effective_cpu,
+			      vduse_irq_bound_wq, irq_work);
 unlock:
 	up_read(&dev->rwsem);
 
@@ -1029,6 +1118,22 @@ static int vduse_dev_reg_umem(struct vduse_dev *dev,
 	return ret;
 }
 
+static void vduse_vq_update_effective_cpu(struct vduse_virtqueue *vq)
+{
+	int curr_cpu = vq->irq_effective_cpu;
+
+	while (true) {
+		curr_cpu = cpumask_next(curr_cpu, &vq->irq_affinity);
+		if (cpu_online(curr_cpu))
+			break;
+
+		if (curr_cpu >= nr_cpu_ids)
+			curr_cpu = -1;
+	}
+
+	vq->irq_effective_cpu = curr_cpu;
+}
+
 static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
 			    unsigned long arg)
 {
@@ -1111,7 +1216,7 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
 		break;
 	}
 	case VDUSE_DEV_INJECT_CONFIG_IRQ:
-		ret = vduse_dev_queue_irq_work(dev, &dev->inject);
+		ret = vduse_dev_queue_irq_work(dev, &dev->inject, IRQ_UNBOUND);
 		break;
 	case VDUSE_VQ_SETUP: {
 		struct vduse_vq_config config;
@@ -1198,7 +1303,10 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
 			break;
 
 		index = array_index_nospec(index, dev->vq_num);
-		ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index]->inject);
+
+		vduse_vq_update_effective_cpu(dev->vqs[index]);
+		ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index]->inject,
+					dev->vqs[index]->irq_effective_cpu);
 		break;
 	}
 	case VDUSE_IOTLB_REG_UMEM: {
@@ -1367,10 +1475,12 @@ static int vduse_dev_init_vqs(struct vduse_dev *dev, u32 vq_align, u32 vq_num)
 			goto err;
 
 		dev->vqs[i]->index = i;
+		dev->vqs[i]->irq_effective_cpu = -1;
 		INIT_WORK(&dev->vqs[i]->inject, vduse_vq_irq_inject);
 		INIT_WORK(&dev->vqs[i]->kick, vduse_vq_kick_work);
 		spin_lock_init(&dev->vqs[i]->kick_lock);
 		spin_lock_init(&dev->vqs[i]->irq_lock);
+		cpumask_setall(&dev->vqs[i]->irq_affinity);
 	}
 
 	return 0;
@@ -1858,12 +1968,15 @@ static int vduse_init(void)
 	if (ret)
 		goto err_cdev;
 
+	ret = -ENOMEM;
 	vduse_irq_wq = alloc_workqueue("vduse-irq",
 				WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0);
-	if (!vduse_irq_wq) {
-		ret = -ENOMEM;
+	if (!vduse_irq_wq)
 		goto err_wq;
-	}
+
+	vduse_irq_bound_wq = alloc_workqueue("vduse-irq-bound", WQ_HIGHPRI, 0);
+	if (!vduse_irq_bound_wq)
+		goto err_bound_wq;
 
 	ret = vduse_domain_init();
 	if (ret)
@@ -1877,6 +1990,8 @@ static int vduse_init(void)
 err_mgmtdev:
 	vduse_domain_exit();
 err_domain:
+	destroy_workqueue(vduse_irq_bound_wq);
+err_bound_wq:
 	destroy_workqueue(vduse_irq_wq);
 err_wq:
 	cdev_del(&vduse_cdev);
@@ -1896,6 +2011,7 @@ static void vduse_exit(void)
 {
 	vduse_mgmtdev_exit();
 	vduse_domain_exit();
+	destroy_workqueue(vduse_irq_bound_wq);
 	destroy_workqueue(vduse_irq_wq);
 	cdev_del(&vduse_cdev);
 	device_destroy(vduse_class, vduse_major);
-- 
2.20.1


  parent reply	other threads:[~2023-02-28  9:42 UTC|newest]

Thread overview: 44+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-02-28  9:40 [PATCH v3 00/11] VDUSE: Improve performance Xie Yongji
2023-02-28  9:41 ` [PATCH v3 01/11] lib/group_cpus: Export group_cpus_evenly() Xie Yongji
2023-03-10  8:51   ` Michael S. Tsirkin
2023-03-10  8:51     ` Michael S. Tsirkin
2023-03-16  9:31   ` Jason Wang
2023-03-16  9:31     ` Jason Wang
2023-02-28  9:41 ` [PATCH v3 02/11] vdpa: Add set/get_vq_affinity callbacks in vdpa_config_ops Xie Yongji
2023-03-16  3:27   ` Jason Wang
2023-03-16  3:27     ` Jason Wang
2023-03-17  7:10     ` Yongji Xie
2023-02-28  9:41 ` [PATCH v3 03/11] vdpa: Add set_irq_affinity callback " Xie Yongji
2023-03-16  4:02   ` Jason Wang
2023-03-16  4:02     ` Jason Wang
2023-03-17  7:44     ` Yongji Xie
2023-03-20  9:31       ` Jason Wang
2023-03-20  9:31         ` Jason Wang
2023-03-20 11:17         ` Yongji Xie
2023-02-28  9:41 ` [PATCH v3 04/11] vduse: Refactor allocation for vduse virtqueues Xie Yongji
2023-02-28  9:41 ` Xie Yongji [this message]
2023-02-28 11:12   ` [PATCH v3 05/11] vduse: Support automatic irq callback affinity kernel test robot
2023-02-28 11:12     ` kernel test robot
2023-03-01  1:18   ` kernel test robot
2023-03-01  1:18     ` kernel test robot
2023-03-16  9:03   ` Jason Wang
2023-03-16  9:03     ` Jason Wang
2023-03-17  7:04     ` Yongji Xie
2023-02-28  9:41 ` [PATCH v3 06/11] vduse: Support set/get_vq_affinity callbacks Xie Yongji
2023-02-28  9:41 ` [PATCH v3 07/11] vduse: Add sysfs interface for irq callback affinity Xie Yongji
2023-02-28  9:41 ` [PATCH v3 08/11] vdpa: Add eventfd for the vdpa callback Xie Yongji
2023-03-16  9:25   ` Jason Wang
2023-03-16  9:25     ` Jason Wang
2023-03-16  9:40     ` Jason Wang
2023-03-16  9:40       ` Jason Wang
2023-03-17  6:57     ` Yongji Xie
2023-02-28  9:41 ` [PATCH v3 09/11] vduse: Signal interrupt's eventfd directly in vhost-vdpa case Xie Yongji
2023-03-16  9:30   ` Jason Wang
2023-03-16  9:30     ` Jason Wang
2023-03-17  7:01     ` Yongji Xie
2023-02-28  9:41 ` [PATCH v3 10/11] vduse: Delay iova domain creation Xie Yongji
2023-02-28  9:41 ` [PATCH v3 11/11] vduse: Support specifying bounce buffer size via sysfs Xie Yongji
2023-03-10  8:49 ` [PATCH v3 00/11] VDUSE: Improve performance Michael S. Tsirkin
2023-03-10  8:49   ` Michael S. Tsirkin
2023-03-10  9:41   ` Jason Wang
2023-03-10  9:41     ` Jason Wang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230228094110.37-6-xieyongji@bytedance.com \
    --to=xieyongji@bytedance.com \
    --cc=hch@lst.de \
    --cc=jasowang@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mst@redhat.com \
    --cc=tglx@linutronix.de \
    --cc=virtualization@lists.linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.