All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v1] KVM: X86: Introduce vfio_intr_stat per-vm debugfs file
@ 2022-01-19 11:50 Yuan ZhaoXiong
  2022-02-07 18:59 ` Sean Christopherson
  0 siblings, 1 reply; 3+ messages in thread
From: Yuan ZhaoXiong @ 2022-01-19 11:50 UTC (permalink / raw)
  To: pbonzini, seanjc, vkuznets, wanpengli, jmattson, joro, tglx,
	mingo, bp, dave.hansen, hpa
  Cc: lirongqing, kvm, x86, linux-kernel

Use this file to export correspondence between guest_irq, host_irq,
vector and vcpu belonging to VFIO passthrough devices.

An example output of this looks like (a vm with VFIO passthrough
devices):
   guest_irq     host_irq       vector         vcpu
          24          201           37            8
          25          202           35           25
          26          203           35           20
   ......

When a VM has VFIO passthrough devices, the correspondence between
guest_irq, host_irq, vector and vcpu may need to be known especially
in AMD platform with avic disabled. The AMD avic is disabled, and
the passthrough devices may cause vcpu vm exit twice for a interrupt.
One extrernal interrupt caused by vfio host irq, other ipi to inject
a interrupt to vm.

If the system administrator known these information, set vfio host
irq affinity to Pcpu which the correspondece guest irq affinited vcpu,
to avoid extra vm exit.

Co-developed-by: Li RongQing <lirongqing@baidu.com>
Signed-off-by: Li RongQing <lirongqing@baidu.com>
Signed-off-by: Yuan ZhaoXiong <yuanzhaoxiong@baidu.com>
---
diff with v0: modifying the code format.

 arch/x86/kvm/debugfs.c | 90 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 90 insertions(+)

diff --git a/arch/x86/kvm/debugfs.c b/arch/x86/kvm/debugfs.c
index 9240b3b..be16bfe 100644
--- a/arch/x86/kvm/debugfs.c
+++ b/arch/x86/kvm/debugfs.c
@@ -10,6 +10,11 @@
 #include "mmu.h"
 #include "mmu/mmu_internal.h"
 
+#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
+#include <linux/kvm_irqfd.h>
+#include <asm/irq_remapping.h>
+#endif
+
 static int vcpu_get_timer_advance_ns(void *data, u64 *val)
 {
 	struct kvm_vcpu *vcpu = (struct kvm_vcpu *) data;
@@ -181,9 +186,94 @@ static int kvm_mmu_rmaps_stat_release(struct inode *inode, struct file *file)
 	.release	= kvm_mmu_rmaps_stat_release,
 };
 
+#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
+static int kvm_vfio_intr_stat_show(struct seq_file *m, void *v)
+{
+	struct kvm_kernel_irq_routing_entry *e;
+	struct kvm_irq_routing_table *irq_rt;
+	unsigned int host_irq, guest_irq;
+	struct kvm_kernel_irqfd *irqfd;
+	struct kvm *kvm = m->private;
+	struct kvm_lapic_irq irq;
+	struct kvm_vcpu *vcpu;
+	int idx;
+
+	if (!kvm_arch_has_assigned_device(kvm) ||
+			!irq_remapping_cap(IRQ_POSTING_CAP)) {
+		return 0;
+	}
+
+	seq_printf(m, "%12s %12s %12s %12s\n",
+			"guest_irq", "host_irq", "vector", "vcpu");
+
+	spin_lock_irq(&kvm->irqfds.lock);
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
+
+	list_for_each_entry(irqfd, &kvm->irqfds.items, list) {
+		if (!irqfd->producer)
+			continue;
+
+		host_irq = irqfd->producer->irq;
+		guest_irq = irqfd->gsi;
+
+		if (guest_irq >= irq_rt->nr_rt_entries ||
+				hlist_empty(&irq_rt->map[guest_irq])) {
+			pr_warn_once("no route for guest_irq %u/%u (broken user space?)\n",
+					guest_irq, irq_rt->nr_rt_entries);
+			continue;
+		}
+
+		hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
+			if (e->type != KVM_IRQ_ROUTING_MSI)
+				continue;
+
+			kvm_set_msi_irq(kvm, e, &irq);
+			if (kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
+				seq_printf(m, "%12u %12u %12u %12u\n",
+						guest_irq, host_irq, irq.vector, vcpu->vcpu_id);
+			}
+		}
+	}
+	srcu_read_unlock(&kvm->irq_srcu, idx);
+	spin_unlock_irq(&kvm->irqfds.lock);
+	return 0;
+}
+
+static int kvm_vfio_intr_stat_open(struct inode *inode, struct file *file)
+{
+	struct kvm *kvm = inode->i_private;
+
+	if (!kvm_get_kvm_safe(kvm))
+		return -ENOENT;
+
+	return single_open(file, kvm_vfio_intr_stat_show, kvm);
+}
+
+static int kvm_vfio_intr_stat_release(struct inode *inode, struct file *file)
+{
+	struct kvm *kvm = inode->i_private;
+
+	kvm_put_kvm(kvm);
+	return single_release(inode, file);
+}
+
+static const struct file_operations vfio_intr_stat_fops = {
+	.open    = kvm_vfio_intr_stat_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = kvm_vfio_intr_stat_release,
+};
+#endif
+
 int kvm_arch_create_vm_debugfs(struct kvm *kvm)
 {
 	debugfs_create_file("mmu_rmaps_stat", 0644, kvm->debugfs_dentry, kvm,
 			    &mmu_rmaps_stat_fops);
+
+#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
+	debugfs_create_file("vfio_intr_stat", 0444, kvm->debugfs_dentry, kvm,
+			    &vfio_intr_stat_fops);
+#endif
 	return 0;
 }
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH v1] KVM: X86: Introduce vfio_intr_stat per-vm debugfs file
  2022-01-19 11:50 [PATCH v1] KVM: X86: Introduce vfio_intr_stat per-vm debugfs file Yuan ZhaoXiong
@ 2022-02-07 18:59 ` Sean Christopherson
  0 siblings, 0 replies; 3+ messages in thread
From: Sean Christopherson @ 2022-02-07 18:59 UTC (permalink / raw)
  To: Yuan ZhaoXiong
  Cc: pbonzini, vkuznets, wanpengli, jmattson, joro, tglx, mingo, bp,
	dave.hansen, hpa, lirongqing, kvm, x86, linux-kernel

On Wed, Jan 19, 2022, Yuan ZhaoXiong wrote:
> +#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS

This is pointless, KVM x86 unconditionally selects HAVE_KVM_IRQ_BYPASS.

> +#include <linux/kvm_irqfd.h>
> +#include <asm/irq_remapping.h>
> +#endif
> +
>  static int vcpu_get_timer_advance_ns(void *data, u64 *val)
>  {
>  	struct kvm_vcpu *vcpu = (struct kvm_vcpu *) data;
> @@ -181,9 +186,94 @@ static int kvm_mmu_rmaps_stat_release(struct inode *inode, struct file *file)
>  	.release	= kvm_mmu_rmaps_stat_release,
>  };
>  
> +#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
> +static int kvm_vfio_intr_stat_show(struct seq_file *m, void *v)
> +{
> +	struct kvm_kernel_irq_routing_entry *e;
> +	struct kvm_irq_routing_table *irq_rt;
> +	unsigned int host_irq, guest_irq;
> +	struct kvm_kernel_irqfd *irqfd;
> +	struct kvm *kvm = m->private;
> +	struct kvm_lapic_irq irq;
> +	struct kvm_vcpu *vcpu;
> +	int idx;
> +
> +	if (!kvm_arch_has_assigned_device(kvm) ||
> +			!irq_remapping_cap(IRQ_POSTING_CAP)) {

Bad indentation and unnecessary curly braces.

	if (!kvm_arch_has_assigned_device(kvm) ||
	    !irq_remapping_cap(IRQ_POSTING_CAP))
		return 0;


> +		return 0;
> +	}
> +
> +	seq_printf(m, "%12s %12s %12s %12s\n",
> +			"guest_irq", "host_irq", "vector", "vcpu");

Bad indentation.  Ditto for many cases below.


	seq_printf(m, "%12s %12s %12s %12s\n",
		   "guest_irq", "host_irq", "vector", "vcpu");
	   
> +
> +	spin_lock_irq(&kvm->irqfds.lock);
> +	idx = srcu_read_lock(&kvm->irq_srcu);
> +	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
> +
> +	list_for_each_entry(irqfd, &kvm->irqfds.items, list) {
> +		if (!irqfd->producer)
> +			continue;
> +
> +		host_irq = irqfd->producer->irq;
> +		guest_irq = irqfd->gsi;
> +
> +		if (guest_irq >= irq_rt->nr_rt_entries ||
> +				hlist_empty(&irq_rt->map[guest_irq])) {

Indentation.

> +			pr_warn_once("no route for guest_irq %u/%u (broken user space?)\n",
> +					guest_irq, irq_rt->nr_rt_entries);

Indentation, though I personally don't see much point of duplicating the message
from vmx_pi_update_irte(), just continue on.

> +			continue;
> +		}
> +
> +		hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
> +			if (e->type != KVM_IRQ_ROUTING_MSI)
> +				continue;
> +
> +			kvm_set_msi_irq(kvm, e, &irq);
> +			if (kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {

Unnecessary curly braces (though this one is debatable).

> +				seq_printf(m, "%12u %12u %12u %12u\n",
> +						guest_irq, host_irq, irq.vector, vcpu->vcpu_id);

Indentation.

> +			}
> +		}
> +	}
> +	srcu_read_unlock(&kvm->irq_srcu, idx);
> +	spin_unlock_irq(&kvm->irqfds.lock);
> +	return 0;
> +}
> +

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH v1] KVM: X86: Introduce vfio_intr_stat per-vm debugfs file
@ 2022-01-19 13:14 Yuan ZhaoXiong
  0 siblings, 0 replies; 3+ messages in thread
From: Yuan ZhaoXiong @ 2022-01-19 13:14 UTC (permalink / raw)
  To: pbonzini, seanjc, vkuznets, wanpengli, jmattson, joro, tglx,
	mingo, bp, dave.hansen, hpa
  Cc: lirongqing, kvm, x86, linux-kernel

Use this file to export correspondence between guest_irq, host_irq,
vector and vcpu belonging to VFIO passthrough devices.

An example output of this looks like (a vm with VFIO passthrough
devices):
   guest_irq     host_irq       vector         vcpu
          24          201           37            8
          25          202           35           25
          26          203           35           20
   ......

When a VM has VFIO passthrough devices, the correspondence between
guest_irq, host_irq, vector and vcpu may need to be known especially
in AMD platform with avic disabled. The AMD avic is disabled, and
the passthrough devices may cause vcpu vm exit twice for a interrupt.
One extrernal interrupt caused by vfio host irq, other ipi to inject
a interrupt to vm.

If the system administrator known these information, set vfio host
irq affinity to Pcpu which the correspondece guest irq affinited vcpu,
to avoid extra vm exit.

Co-developed-by: Li RongQing <lirongqing@baidu.com>
Signed-off-by: Li RongQing <lirongqing@baidu.com>
Signed-off-by: Yuan ZhaoXiong <yuanzhaoxiong@baidu.com>
---
diff with v0: modifying the code format.

 arch/x86/kvm/debugfs.c | 90 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 90 insertions(+)

diff --git a/arch/x86/kvm/debugfs.c b/arch/x86/kvm/debugfs.c
index 9240b3b..be16bfe 100644
--- a/arch/x86/kvm/debugfs.c
+++ b/arch/x86/kvm/debugfs.c
@@ -10,6 +10,11 @@
 #include "mmu.h"
 #include "mmu/mmu_internal.h"
 
+#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
+#include <linux/kvm_irqfd.h>
+#include <asm/irq_remapping.h>
+#endif
+
 static int vcpu_get_timer_advance_ns(void *data, u64 *val)
 {
 	struct kvm_vcpu *vcpu = (struct kvm_vcpu *) data;
@@ -181,9 +186,94 @@ static int kvm_mmu_rmaps_stat_release(struct inode *inode, struct file *file)
 	.release	= kvm_mmu_rmaps_stat_release,
 };
 
+#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
+static int kvm_vfio_intr_stat_show(struct seq_file *m, void *v)
+{
+	struct kvm_kernel_irq_routing_entry *e;
+	struct kvm_irq_routing_table *irq_rt;
+	unsigned int host_irq, guest_irq;
+	struct kvm_kernel_irqfd *irqfd;
+	struct kvm *kvm = m->private;
+	struct kvm_lapic_irq irq;
+	struct kvm_vcpu *vcpu;
+	int idx;
+
+	if (!kvm_arch_has_assigned_device(kvm) ||
+			!irq_remapping_cap(IRQ_POSTING_CAP)) {
+		return 0;
+	}
+
+	seq_printf(m, "%12s %12s %12s %12s\n",
+			"guest_irq", "host_irq", "vector", "vcpu");
+
+	spin_lock_irq(&kvm->irqfds.lock);
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
+
+	list_for_each_entry(irqfd, &kvm->irqfds.items, list) {
+		if (!irqfd->producer)
+			continue;
+
+		host_irq = irqfd->producer->irq;
+		guest_irq = irqfd->gsi;
+
+		if (guest_irq >= irq_rt->nr_rt_entries ||
+				hlist_empty(&irq_rt->map[guest_irq])) {
+			pr_warn_once("no route for guest_irq %u/%u (broken user space?)\n",
+					guest_irq, irq_rt->nr_rt_entries);
+			continue;
+		}
+
+		hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
+			if (e->type != KVM_IRQ_ROUTING_MSI)
+				continue;
+
+			kvm_set_msi_irq(kvm, e, &irq);
+			if (kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
+				seq_printf(m, "%12u %12u %12u %12u\n",
+						guest_irq, host_irq, irq.vector, vcpu->vcpu_id);
+			}
+		}
+	}
+	srcu_read_unlock(&kvm->irq_srcu, idx);
+	spin_unlock_irq(&kvm->irqfds.lock);
+	return 0;
+}
+
+static int kvm_vfio_intr_stat_open(struct inode *inode, struct file *file)
+{
+	struct kvm *kvm = inode->i_private;
+
+	if (!kvm_get_kvm_safe(kvm))
+		return -ENOENT;
+
+	return single_open(file, kvm_vfio_intr_stat_show, kvm);
+}
+
+static int kvm_vfio_intr_stat_release(struct inode *inode, struct file *file)
+{
+	struct kvm *kvm = inode->i_private;
+
+	kvm_put_kvm(kvm);
+	return single_release(inode, file);
+}
+
+static const struct file_operations vfio_intr_stat_fops = {
+	.open    = kvm_vfio_intr_stat_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = kvm_vfio_intr_stat_release,
+};
+#endif
+
 int kvm_arch_create_vm_debugfs(struct kvm *kvm)
 {
 	debugfs_create_file("mmu_rmaps_stat", 0644, kvm->debugfs_dentry, kvm,
 			    &mmu_rmaps_stat_fops);
+
+#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
+	debugfs_create_file("vfio_intr_stat", 0444, kvm->debugfs_dentry, kvm,
+			    &vfio_intr_stat_fops);
+#endif
 	return 0;
 }
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2022-02-07 19:02 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-01-19 11:50 [PATCH v1] KVM: X86: Introduce vfio_intr_stat per-vm debugfs file Yuan ZhaoXiong
2022-02-07 18:59 ` Sean Christopherson
2022-01-19 13:14 Yuan ZhaoXiong

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.