From: Alex Williamson <alex.williamson@redhat.com>
To: avi@redhat.com, mst@redhat.com
Cc: kvm@vger.kernel.org, linux-kernel@vger.kernel.org,
jan.kiszka@siemens.com
Subject: [PATCH v2 5/6] kvm: KVM_EOIFD, an eventfd for EOIs
Date: Tue, 26 Jun 2012 23:10:08 -0600 [thread overview]
Message-ID: <20120627050952.23698.37235.stgit@bling.home> (raw)
In-Reply-To: <20120627044758.23698.249.stgit@bling.home>
This new ioctl enables an eventfd to be triggered when an EOI is
written for a specified irqchip pin. By default this is a simple
notification, but we can also tie the eoifd to a level irqfd, which
enables the irqchip pin to be automatically de-asserted on EOI.
This mode is particularly useful for device-assignment applications
where the unmask and notify triggers a hardware unmask. The default
mode is most applicable to simple notify with no side-effects for
userspace usage, such as Qemu.
Here we make use of the reference counting of the _irq_source
object allowing us to share it with an irqfd and cleanup regardless
of the release order.
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
Documentation/virtual/kvm/api.txt | 24 +++++
arch/x86/kvm/x86.c | 1
include/linux/kvm.h | 14 +++
include/linux/kvm_host.h | 13 +++
virt/kvm/eventfd.c | 189 +++++++++++++++++++++++++++++++++++++
virt/kvm/kvm_main.c | 11 ++
6 files changed, 250 insertions(+), 2 deletions(-)
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index b216709..87a2558 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1987,6 +1987,30 @@ interrupts with those injected through KVM_IRQ_LINE. IRQFDs created
with KVM_IRQFD_FLAG_LEVEL must also set this flag when de-assiging.
KVM_IRQFD_FLAG_LEVEL support is indicated by KVM_CAP_IRQFD_LEVEL.
+4.77 KVM_EOIFD
+
+Capability: KVM_CAP_EOIFD
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_eoifd (in)
+Returns: 0 on success, -1 on error
+
+KVM_EOIFD allows userspace to receive EOI notification through an
+eventfd for level triggered irqchip interrupts. Behavior for edge
+triggered interrupts is undefined. kvm_eoifd.fd specifies the eventfd
+used for notification and kvm_eoifd.gsi specifies the irchip pin,
+similar to KVM_IRQFD. KVM_EOIFD_FLAG_DEASSIGN is used to deassign
+a previously enabled eoifd and should also set fd and gsi to match.
+
+The KVM_EOIFD_FLAG_LEVEL_IRQFD flag indicates that the EOI is for
+a level triggered EOI and the kvm_eoifd structure includes
+kvm_eoifd.irqfd, which must be previously configured using KVM_IRQFD
+with the KVM_IRQFD_FLAG_LEVEL flag. This allows both EOI notification
+through kvm_eoifd.fd as well as automatically de-asserting level
+irqfds on EOI. Both KVM_EOIFD_FLAG_DEASSIGN and
+KVM_EOIFD_FLAG_LEVEL_IRQFD should be used to de-assign an eoifd
+initially setup with KVM_EOIFD_FLAG_LEVEL_IRQFD.
+
5. The kvm_run structure
------------------------
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 80bed07..62d6eca 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2149,6 +2149,7 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_PCI_2_3:
case KVM_CAP_KVMCLOCK_CTRL:
case KVM_CAP_IRQFD_LEVEL:
+ case KVM_CAP_EOIFD:
r = 1;
break;
case KVM_CAP_COALESCED_MMIO:
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index b2e6e4f..7567e7d 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -619,6 +619,7 @@ struct kvm_ppc_smmu_info {
#define KVM_CAP_S390_COW 79
#define KVM_CAP_PPC_ALLOC_HTAB 80
#define KVM_CAP_IRQFD_LEVEL 81
+#define KVM_CAP_EOIFD 82
#ifdef KVM_CAP_IRQ_ROUTING
@@ -694,6 +695,17 @@ struct kvm_irqfd {
__u8 pad[20];
};
+#define KVM_EOIFD_FLAG_DEASSIGN (1 << 0)
+#define KVM_EOIFD_FLAG_LEVEL_IRQFD (1 << 1)
+
+struct kvm_eoifd {
+ __u32 fd;
+ __u32 gsi;
+ __u32 flags;
+ __u32 irqfd;
+ __u8 pad[16];
+};
+
struct kvm_clock_data {
__u64 clock;
__u32 flags;
@@ -834,6 +846,8 @@ struct kvm_s390_ucas_mapping {
#define KVM_PPC_GET_SMMU_INFO _IOR(KVMIO, 0xa6, struct kvm_ppc_smmu_info)
/* Available with KVM_CAP_PPC_ALLOC_HTAB */
#define KVM_PPC_ALLOCATE_HTAB _IOWR(KVMIO, 0xa7, __u32)
+/* Available with KVM_CAP_EOIFD */
+#define KVM_EOIFD _IOW(KVMIO, 0xa8, struct kvm_eoifd)
/*
* ioctls for vcpu fds
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ae3b426..83472eb 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -285,6 +285,10 @@ struct kvm {
struct list_head items;
} irqfds;
struct list_head ioeventfds;
+ struct {
+ spinlock_t lock;
+ struct list_head items;
+ } eoifds;
#endif
struct kvm_vm_stat stat;
struct kvm_arch arch;
@@ -828,6 +832,8 @@ int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args);
void kvm_irqfd_release(struct kvm *kvm);
void kvm_irq_routing_update(struct kvm *, struct kvm_irq_routing_table *);
int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args);
+int kvm_eoifd(struct kvm *kvm, struct kvm_eoifd *args);
+void kvm_eoifd_release(struct kvm *kvm);
#else
@@ -853,6 +859,13 @@ static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
return -ENOSYS;
}
+static inline int kvm_eoifd(struct kvm *kvm, struct kvm_eoifd *args)
+{
+ return -ENOSYS;
+}
+
+static inline void kvm_eoifd_release(struct kvm *kvm) {}
+
#endif /* CONFIG_HAVE_KVM_EVENTFD */
#ifdef CONFIG_KVM_APIC_ARCHITECTURE
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 18cc284..02ca50f 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -62,8 +62,7 @@ static void put_irq_source(struct _irq_source *source)
kref_put(&source->kref, release_irq_source);
}
-static struct _irq_source *__attribute__ ((used)) /* white lie for now */
-get_irq_source(struct _irq_source *source)
+static struct _irq_source *get_irq_source(struct _irq_source *source)
{
if (source)
kref_get(&source->kref);
@@ -118,6 +117,41 @@ struct _irqfd {
struct work_struct shutdown;
};
+static struct _irq_source *get_irq_source_from_irqfd(struct kvm *kvm, int fd)
+{
+ struct file *file;
+ struct eventfd_ctx *eventfd;
+ struct _irqfd *irqfd;
+ struct _irq_source *source = NULL;
+
+ file = fget(fd);
+ if (!file)
+ return ERR_PTR(-EBADF);
+
+ eventfd = eventfd_ctx_fileget(file);
+ if (IS_ERR(eventfd)) {
+ fput(file);
+ return (struct _irq_source *)eventfd;
+ }
+
+ spin_lock_irq(&kvm->irqfds.lock);
+
+ list_for_each_entry(irqfd, &kvm->irqfds.items, list) {
+ if (irqfd->eventfd != eventfd)
+ continue;
+
+ source = get_irq_source(irqfd->source);
+ break;
+ }
+
+ spin_unlock_irq(&kvm->irqfds.lock);
+
+ eventfd_ctx_put(eventfd);
+ fput(file);
+
+ return source ? source : ERR_PTR(-ENODEV);
+}
+
static struct workqueue_struct *irqfd_cleanup_wq;
static void
@@ -375,6 +409,8 @@ kvm_eventfd_init(struct kvm *kvm)
spin_lock_init(&kvm->irqfds.lock);
INIT_LIST_HEAD(&kvm->irqfds.items);
INIT_LIST_HEAD(&kvm->ioeventfds);
+ spin_lock_init(&kvm->eoifds.lock);
+ INIT_LIST_HEAD(&kvm->eoifds.items);
}
/*
@@ -743,3 +779,152 @@ kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
return kvm_assign_ioeventfd(kvm, args);
}
+
+/*
+ * --------------------------------------------------------------------
+ * eoifd: Translate KVM APIC/IOAPIC EOI into eventfd signal.
+ *
+ * userspace can register GSIs with an eventfd for receiving
+ * notification when an EOI occurs.
+ * --------------------------------------------------------------------
+ */
+
+struct _eoifd {
+ struct kvm *kvm;
+ struct eventfd_ctx *eventfd;
+ struct _irq_source *source;
+ struct kvm_irq_ack_notifier notifier;
+ struct list_head list;
+};
+
+static void eoifd_event(struct kvm_irq_ack_notifier *notifier)
+{
+ struct _eoifd *eoifd;
+
+ eoifd = container_of(notifier, struct _eoifd, notifier);
+
+ if (eoifd->source)
+ kvm_set_irq(eoifd->kvm, eoifd->source->id,
+ eoifd->notifier.gsi, 0);
+
+ eventfd_signal(eoifd->eventfd, 1);
+}
+
+static int kvm_assign_eoifd(struct kvm *kvm, struct kvm_eoifd *args)
+{
+ struct eventfd_ctx *eventfd;
+ struct _eoifd *eoifd, *tmp;
+ struct _irq_source *source = NULL;
+
+ if (args->flags & KVM_EOIFD_FLAG_LEVEL_IRQFD) {
+ source = get_irq_source_from_irqfd(kvm, args->irqfd);
+ if (IS_ERR(source))
+ return PTR_ERR(source);
+ }
+
+ eventfd = eventfd_ctx_fdget(args->fd);
+ if (IS_ERR(eventfd)) {
+ put_irq_source(source);
+ return PTR_ERR(eventfd);
+ }
+
+ eoifd = kzalloc(sizeof(*eoifd), GFP_KERNEL);
+ if (!eoifd) {
+ put_irq_source(source);
+ eventfd_ctx_put(eventfd);
+ return -ENOMEM;
+ }
+
+ INIT_LIST_HEAD(&eoifd->list);
+ eoifd->kvm = kvm;
+ eoifd->eventfd = eventfd;
+ eoifd->source = source;
+ eoifd->notifier.gsi = args->gsi;
+ eoifd->notifier.irq_acked = eoifd_event;
+
+ spin_lock_irq(&kvm->eoifds.lock);
+
+ list_for_each_entry(tmp, &kvm->eoifds.items, list) {
+ if (eoifd->eventfd != tmp->eventfd)
+ continue;
+
+ put_irq_source(source);
+ eventfd_ctx_put(eventfd);
+ kfree(eoifd);
+ return -EBUSY;
+ }
+
+ list_add_tail(&eoifd->list, &kvm->eoifds.items);
+ kvm_register_irq_ack_notifier(kvm, &eoifd->notifier);
+
+ spin_unlock_irq(&kvm->eoifds.lock);
+
+ return 0;
+}
+
+static void eoifd_deactivate(struct kvm *kvm, struct _eoifd *eoifd)
+{
+ list_del(&eoifd->list);
+ kvm_unregister_irq_ack_notifier(kvm, &eoifd->notifier);
+ put_irq_source(eoifd->source);
+ eventfd_ctx_put(eoifd->eventfd);
+ kfree(eoifd);
+}
+
+void kvm_eoifd_release(struct kvm *kvm)
+{
+ struct _eoifd *eoifd, *tmp;
+
+ spin_lock_irq(&kvm->eoifds.lock);
+
+ list_for_each_entry_safe(eoifd, tmp, &kvm->eoifds.items, list)
+ eoifd_deactivate(kvm, eoifd);
+
+ spin_unlock_irq(&kvm->eoifds.lock);
+}
+
+static int kvm_deassign_eoifd(struct kvm *kvm, struct kvm_eoifd *args)
+{
+ struct eventfd_ctx *eventfd;
+ struct _eoifd *eoifd;
+ bool uses_source = (args->flags & KVM_EOIFD_FLAG_LEVEL_IRQFD) != 0;
+ int ret = -ENODEV;
+
+ eventfd = eventfd_ctx_fdget(args->fd);
+ if (IS_ERR(eventfd))
+ return PTR_ERR(eventfd);
+
+ spin_lock_irq(&kvm->eoifds.lock);
+
+ list_for_each_entry(eoifd, &kvm->eoifds.items, list) {
+ /*
+ * Matching eventfd is unique since we don't allow dulicates,
+ * the rest is sanitizing the calling parameters.
+ */
+ if (eoifd->eventfd == eventfd &&
+ eoifd->notifier.gsi == args->gsi &&
+ uses_source == (eoifd->source != NULL)) {
+ eoifd_deactivate(kvm, eoifd);
+ ret = 0;
+ break;
+ }
+ }
+
+ spin_unlock_irq(&kvm->eoifds.lock);
+
+ eventfd_ctx_put(eventfd);
+
+ return ret;
+}
+
+int kvm_eoifd(struct kvm *kvm, struct kvm_eoifd *args)
+{
+ if (args->flags & ~(KVM_EOIFD_FLAG_DEASSIGN |
+ KVM_EOIFD_FLAG_LEVEL_IRQFD))
+ return -EINVAL;
+
+ if (args->flags & KVM_EOIFD_FLAG_DEASSIGN)
+ return kvm_deassign_eoifd(kvm, args);
+
+ return kvm_assign_eoifd(kvm, args);
+}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index b4ad14cc..5b41df1 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -620,6 +620,8 @@ static int kvm_vm_release(struct inode *inode, struct file *filp)
kvm_irqfd_release(kvm);
+ kvm_eoifd_release(kvm);
+
kvm_put_kvm(kvm);
return 0;
}
@@ -2093,6 +2095,15 @@ static long kvm_vm_ioctl(struct file *filp,
break;
}
#endif
+ case KVM_EOIFD: {
+ struct kvm_eoifd data;
+
+ r = -EFAULT;
+ if (copy_from_user(&data, argp, sizeof data))
+ goto out;
+ r = kvm_eoifd(kvm, &data);
+ break;
+ }
default:
r = kvm_arch_vm_ioctl(filp, ioctl, arg);
if (r == -ENOTTY)
next prev parent reply other threads:[~2012-06-27 5:10 UTC|newest]
Thread overview: 64+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-06-27 5:08 [PATCH v2 0/6] kvm: level triggered irqfd support Alex Williamson
2012-06-27 5:09 ` [PATCH v2 1/6] kvm: Pass kvm_irqfd to functions Alex Williamson
2012-06-27 9:35 ` Michael S. Tsirkin
2012-06-27 14:30 ` Alex Williamson
2012-06-27 14:24 ` Cornelia Huck
2012-06-28 8:38 ` Michael S. Tsirkin
2012-06-28 9:03 ` Cornelia Huck
2012-06-28 9:34 ` Michael S. Tsirkin
2012-06-28 12:00 ` Cornelia Huck
2012-06-28 12:09 ` Michael S. Tsirkin
2012-06-28 16:51 ` Cornelia Huck
2012-06-28 16:56 ` Michael S. Tsirkin
2012-06-29 15:14 ` Alex Williamson
2012-06-27 5:09 ` [PATCH v2 2/6] kvm: Add missing KVM_IRQFD API documentation Alex Williamson
2012-06-27 9:53 ` Michael S. Tsirkin
2012-06-27 5:09 ` [PATCH v2 3/6] kvm: Sanitize KVM_IRQFD flags Alex Williamson
2012-06-27 9:21 ` Michael S. Tsirkin
2012-06-27 20:12 ` Alex Williamson
2012-06-27 20:22 ` Michael S. Tsirkin
2012-06-28 12:35 ` Avi Kivity
2012-06-27 5:09 ` [PATCH v2 4/6] kvm: Extend irqfd to support level interrupts Alex Williamson
2012-06-27 9:34 ` Michael S. Tsirkin
2012-06-27 21:19 ` Alex Williamson
2012-06-28 12:41 ` Avi Kivity
2012-06-27 9:51 ` Michael S. Tsirkin
2012-06-27 20:59 ` Alex Williamson
2012-06-27 21:14 ` Michael S. Tsirkin
2012-06-27 21:28 ` Alex Williamson
2012-06-27 22:28 ` Michael S. Tsirkin
2012-06-28 3:52 ` Alex Williamson
2012-06-28 8:29 ` Michael S. Tsirkin
2012-06-29 15:13 ` Alex Williamson
2012-06-27 15:26 ` Michael S. Tsirkin
2012-06-27 22:04 ` Alex Williamson
2012-06-27 22:31 ` Michael S. Tsirkin
2012-06-28 6:34 ` Gleb Natapov
2012-06-28 8:34 ` Michael S. Tsirkin
2012-06-28 8:35 ` Gleb Natapov
2012-06-28 8:41 ` Michael S. Tsirkin
2012-06-28 8:46 ` Gleb Natapov
2012-06-28 8:48 ` Michael S. Tsirkin
2012-06-28 8:53 ` Gleb Natapov
2012-06-29 22:27 ` Alex Williamson
2012-07-01 7:34 ` Gleb Natapov
2012-06-27 5:10 ` Alex Williamson [this message]
2012-06-27 9:49 ` [PATCH v2 5/6] kvm: KVM_EOIFD, an eventfd for EOIs Michael S. Tsirkin
2012-06-27 13:58 ` Gleb Natapov
2012-06-27 14:29 ` Alex Williamson
2012-06-27 14:51 ` Gleb Natapov
2012-06-28 3:55 ` Alex Williamson
2012-06-28 13:11 ` Michael S. Tsirkin
2012-06-28 14:08 ` Gleb Natapov
2012-06-28 16:55 ` Michael S. Tsirkin
2012-06-27 15:20 ` Michael S. Tsirkin
2012-06-28 19:29 ` Michael S. Tsirkin
2012-06-29 15:09 ` Alex Williamson
2012-06-29 15:12 ` Alex Williamson
2012-06-27 5:10 ` [PATCH v2 6/6] kvm: Level IRQ de-assert for KVM_IRQFD Alex Williamson
2012-06-28 12:59 ` Avi Kivity
2012-06-29 15:39 ` Alex Williamson
2012-06-27 9:15 ` [PATCH v2 0/6] kvm: level triggered irqfd support Michael S. Tsirkin
2012-06-27 9:58 ` Michael S. Tsirkin
2012-06-27 14:33 ` Alex Williamson
2012-06-28 8:42 ` Michael S. Tsirkin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20120627050952.23698.37235.stgit@bling.home \
--to=alex.williamson@redhat.com \
--cc=avi@redhat.com \
--cc=jan.kiszka@siemens.com \
--cc=kvm@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mst@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).