All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH RFC] kvm: fast-path msi injection with irqfd
@ 2010-11-17 22:12 Michael S. Tsirkin
  2010-11-18  9:05 ` Gleb Natapov
                   ` (2 more replies)
  0 siblings, 3 replies; 22+ messages in thread
From: Michael S. Tsirkin @ 2010-11-17 22:12 UTC (permalink / raw)
  Cc: Avi Kivity, Marcelo Tosatti, Gleb Natapov, Xiao Guangrong,
	Michael S. Tsirkin, Gregory Haskins, Chris Lalancette, kvm,
	linux-kernel

Store irq routing table pointer in the irqfd object,
and use that to inject MSI directly without bouncing out to
a kernel thread.

While we touch this structure, rearrange irqfd fields to make fastpath
better packed for better cache utilization.

Some notes on the design:
- Use pointer into the rt instead of copying an entry,
  to make it possible to use rcu, thus side-stepping
  locking complexities.  We also save some memory this way.
- Old workqueue code is still used for level irqs.
  I don't think we DTRT with level anyway, however,
  it seems easier to keep the code around as
  it has been thought through and debugged, and fix level later than
  rip out and re-instate it later.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---

The below is compile tested only.  Sending out for early
flames/feedback.  Please review!

 include/linux/kvm_host.h |    4 ++
 virt/kvm/eventfd.c       |   81 +++++++++++++++++++++++++++++++++++++++------
 virt/kvm/irq_comm.c      |    6 ++-
 3 files changed, 78 insertions(+), 13 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a055742..b6f7047 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -462,6 +462,8 @@ void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
 				   unsigned long *deliver_bitmask);
 #endif
 int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level);
+int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm,
+		int irq_source_id, int level);
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
 				   struct kvm_irq_ack_notifier *kian);
@@ -603,6 +605,7 @@ static inline void kvm_free_irq_routing(struct kvm *kvm) {}
 void kvm_eventfd_init(struct kvm *kvm);
 int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags);
 void kvm_irqfd_release(struct kvm *kvm);
+void kvm_irqfd_update(struct kvm *kvm, struct kvm_irq_routing_table *irq_rt);
 int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args);
 
 #else
@@ -614,6 +617,7 @@ static inline int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags)
 }
 
 static inline void kvm_irqfd_release(struct kvm *kvm) {}
+static inline void kvm_irqfd_update(struct kvm *kvm) {}
 static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 {
 	return -ENOSYS;
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index c1f1e3c..49c1864 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -44,14 +44,18 @@
  */
 
 struct _irqfd {
-	struct kvm               *kvm;
-	struct eventfd_ctx       *eventfd;
-	int                       gsi;
-	struct list_head          list;
-	poll_table                pt;
-	wait_queue_t              wait;
-	struct work_struct        inject;
-	struct work_struct        shutdown;
+	/* Used for MSI fast-path */
+	struct kvm *kvm;
+	wait_queue_t wait;
+	struct kvm_kernel_irq_routing_entry __rcu *irq_entry;
+	/* Used for level IRQ fast-path */
+	int gsi;
+	struct work_struct inject;
+	/* Used for setup/shutdown */
+	struct eventfd_ctx *eventfd;
+	struct list_head list;
+	poll_table pt;
+	struct work_struct shutdown;
 };
 
 static struct workqueue_struct *irqfd_cleanup_wq;
@@ -125,10 +129,18 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
 {
 	struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait);
 	unsigned long flags = (unsigned long)key;
+	struct kvm_kernel_irq_routing_entry *irq;
 
-	if (flags & POLLIN)
+	if (flags & POLLIN) {
+		rcu_read_lock();
+		irq = irqfd->irq_entry;
 		/* An event has been signaled, inject an interrupt */
-		schedule_work(&irqfd->inject);
+		if (irq)
+			kvm_set_msi(irq, irqfd->kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1);
+		else
+			schedule_work(&irqfd->inject);
+		rcu_read_unlock();
+	}
 
 	if (flags & POLLHUP) {
 		/* The eventfd is closing, detach from KVM */
@@ -166,6 +178,7 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
 static int
 kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
 {
+	struct kvm_irq_routing_table *irq_rt;
 	struct _irqfd *irqfd, *tmp;
 	struct file *file = NULL;
 	struct eventfd_ctx *eventfd = NULL;
@@ -215,6 +228,10 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
 		goto fail;
 	}
 
+	rcu_read_lock();
+	irqfd_update(kvm, irqfd, rcu_dereference(kvm->irq_routing));
+	rcu_read_unlock();
+
 	events = file->f_op->poll(file, &irqfd->pt);
 
 	list_add_tail(&irqfd->list, &kvm->irqfds.items);
@@ -271,8 +288,15 @@ kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi)
 	spin_lock_irq(&kvm->irqfds.lock);
 
 	list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) {
-		if (irqfd->eventfd == eventfd && irqfd->gsi == gsi)
+		if (irqfd->eventfd == eventfd && irqfd->gsi == gsi) {
+			/* This rcu_assign_pointer is needed for when
+			 * another thread calls kvm_irqfd_update before
+			 * we flush workqueue below.
+			 * It is paired with synchronize_rcu done by caller
+			 * of that function. */
+			rcu_assign_pointer(irqfd->irq_entry, NULL);
 			irqfd_deactivate(irqfd);
+		}
 	}
 
 	spin_unlock_irq(&kvm->irqfds.lock);
@@ -321,6 +345,41 @@ kvm_irqfd_release(struct kvm *kvm)
 
 }
 
+/* Must be called under irqfds.lock */
+static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd,
+			 struct kvm_irq_routing_table *irq_rt)
+{
+	struct kvm_kernel_irq_routing_entry *e;
+	struct hlist_node *n;
+
+	if (irqfd->gsi >= irq_rt->nr_rt_entries) {
+		rcu_assign_pointer(irqfd->irq_entry, NULL);
+		return;
+	}
+
+	hlist_for_each_entry(e, n, &irq_rt->map[irqfd->gsi], link) {
+		/* Only fast-path MSI. */
+		if (e->type == KVM_IRQ_ROUTING_MSI)
+			rcu_assign_pointer(irqfd->irq_entry, e);
+		else
+			rcu_assign_pointer(irqfd->irq_entry, NULL);
+	}
+}
+
+/* Update irqfd after a routing change.  Caller must invoke synchronize_rcu
+ * afterwards. */
+void kvm_irqfd_update(struct kvm *kvm, struct kvm_irq_routing_table *irq_rt)
+{
+	struct _irqfd *irqfd;
+
+	spin_lock_irq(&kvm->irqfds.lock);
+
+	list_for_each_entry(irqfd, &kvm->irqfds.items, list)
+		irqfd_update(kvm, irqfd, irq_rt);
+
+	spin_unlock_irq(&kvm->irqfds.lock);
+}
+
 /*
  * create a host-wide workqueue for issuing deferred shutdown requests
  * aggregated from all vm* instances. We need our own isolated single-thread
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 8edca91..265ab72 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -114,8 +114,8 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
 	return r;
 }
 
-static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
-		       struct kvm *kvm, int irq_source_id, int level)
+int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
+		struct kvm *kvm, int irq_source_id, int level)
 {
 	struct kvm_lapic_irq irq;
 
@@ -410,7 +410,9 @@ int kvm_set_irq_routing(struct kvm *kvm,
 	mutex_lock(&kvm->irq_lock);
 	old = kvm->irq_routing;
 	rcu_assign_pointer(kvm->irq_routing, new);
+	kvm_irqfd_update(kvm, new);
 	mutex_unlock(&kvm->irq_lock);
+
 	synchronize_rcu();
 
 	new = old;
-- 
1.7.3.2.91.g446ac

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH RFC] kvm: fast-path msi injection with irqfd
  2010-11-17 22:12 [PATCH RFC] kvm: fast-path msi injection with irqfd Michael S. Tsirkin
@ 2010-11-18  9:05 ` Gleb Natapov
  2010-11-18  9:16   ` Michael S. Tsirkin
  2010-11-18  9:34   ` Michael S. Tsirkin
  2010-11-18  9:55 ` Avi Kivity
  2010-11-18 10:57   ` Michael S. Tsirkin
  2 siblings, 2 replies; 22+ messages in thread
From: Gleb Natapov @ 2010-11-18  9:05 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Avi Kivity, Marcelo Tosatti, Xiao Guangrong, Gregory Haskins,
	Chris Lalancette, kvm, linux-kernel

On Thu, Nov 18, 2010 at 12:12:54AM +0200, Michael S. Tsirkin wrote:
> Store irq routing table pointer in the irqfd object,
> and use that to inject MSI directly without bouncing out to
> a kernel thread.
> 
> While we touch this structure, rearrange irqfd fields to make fastpath
> better packed for better cache utilization.
> 
> Some notes on the design:
> - Use pointer into the rt instead of copying an entry,
>   to make it possible to use rcu, thus side-stepping
>   locking complexities.  We also save some memory this way.
What locking complexity is there with copying entry approach?

> - Old workqueue code is still used for level irqs.
>   I don't think we DTRT with level anyway, however,
>   it seems easier to keep the code around as
>   it has been thought through and debugged, and fix level later than
>   rip out and re-instate it later.
> 
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
> 
> The below is compile tested only.  Sending out for early
> flames/feedback.  Please review!
> 
>  include/linux/kvm_host.h |    4 ++
>  virt/kvm/eventfd.c       |   81 +++++++++++++++++++++++++++++++++++++++------
>  virt/kvm/irq_comm.c      |    6 ++-
>  3 files changed, 78 insertions(+), 13 deletions(-)
> 
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index a055742..b6f7047 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -462,6 +462,8 @@ void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
>  				   unsigned long *deliver_bitmask);
>  #endif
>  int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level);
> +int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm,
> +		int irq_source_id, int level);
>  void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
>  void kvm_register_irq_ack_notifier(struct kvm *kvm,
>  				   struct kvm_irq_ack_notifier *kian);
> @@ -603,6 +605,7 @@ static inline void kvm_free_irq_routing(struct kvm *kvm) {}
>  void kvm_eventfd_init(struct kvm *kvm);
>  int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags);
>  void kvm_irqfd_release(struct kvm *kvm);
> +void kvm_irqfd_update(struct kvm *kvm, struct kvm_irq_routing_table *irq_rt);
>  int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args);
>  
>  #else
> @@ -614,6 +617,7 @@ static inline int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags)
>  }
>  
>  static inline void kvm_irqfd_release(struct kvm *kvm) {}
> +static inline void kvm_irqfd_update(struct kvm *kvm) {}
>  static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
>  {
>  	return -ENOSYS;
> diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
> index c1f1e3c..49c1864 100644
> --- a/virt/kvm/eventfd.c
> +++ b/virt/kvm/eventfd.c
> @@ -44,14 +44,18 @@
>   */
>  
>  struct _irqfd {
> -	struct kvm               *kvm;
> -	struct eventfd_ctx       *eventfd;
> -	int                       gsi;
> -	struct list_head          list;
> -	poll_table                pt;
> -	wait_queue_t              wait;
> -	struct work_struct        inject;
> -	struct work_struct        shutdown;
> +	/* Used for MSI fast-path */
> +	struct kvm *kvm;
> +	wait_queue_t wait;
> +	struct kvm_kernel_irq_routing_entry __rcu *irq_entry;
> +	/* Used for level IRQ fast-path */
> +	int gsi;
> +	struct work_struct inject;
> +	/* Used for setup/shutdown */
> +	struct eventfd_ctx *eventfd;
> +	struct list_head list;
> +	poll_table pt;
> +	struct work_struct shutdown;
>  };
>  
>  static struct workqueue_struct *irqfd_cleanup_wq;
> @@ -125,10 +129,18 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
>  {
>  	struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait);
>  	unsigned long flags = (unsigned long)key;
> +	struct kvm_kernel_irq_routing_entry *irq;
>  
> -	if (flags & POLLIN)
> +	if (flags & POLLIN) {
> +		rcu_read_lock();
> +		irq = irqfd->irq_entry;
Why not rcu_dereference()? And why it can't be zero here?

>  		/* An event has been signaled, inject an interrupt */
> -		schedule_work(&irqfd->inject);
> +		if (irq)
> +			kvm_set_msi(irq, irqfd->kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1);
> +		else
> +			schedule_work(&irqfd->inject);
> +		rcu_read_unlock();
> +	}
>  
>  	if (flags & POLLHUP) {
>  		/* The eventfd is closing, detach from KVM */
> @@ -166,6 +178,7 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
>  static int
>  kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
>  {
> +	struct kvm_irq_routing_table *irq_rt;
>  	struct _irqfd *irqfd, *tmp;
>  	struct file *file = NULL;
>  	struct eventfd_ctx *eventfd = NULL;
> @@ -215,6 +228,10 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
>  		goto fail;
>  	}
>  
> +	rcu_read_lock();
> +	irqfd_update(kvm, irqfd, rcu_dereference(kvm->irq_routing));
> +	rcu_read_unlock();
> +
>  	events = file->f_op->poll(file, &irqfd->pt);
>  
>  	list_add_tail(&irqfd->list, &kvm->irqfds.items);
> @@ -271,8 +288,15 @@ kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi)
>  	spin_lock_irq(&kvm->irqfds.lock);
>  
>  	list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) {
> -		if (irqfd->eventfd == eventfd && irqfd->gsi == gsi)
> +		if (irqfd->eventfd == eventfd && irqfd->gsi == gsi) {
> +			/* This rcu_assign_pointer is needed for when
> +			 * another thread calls kvm_irqfd_update before
> +			 * we flush workqueue below.
> +			 * It is paired with synchronize_rcu done by caller
> +			 * of that function. */
> +			rcu_assign_pointer(irqfd->irq_entry, NULL);
>  			irqfd_deactivate(irqfd);
> +		}
>  	}
>  
>  	spin_unlock_irq(&kvm->irqfds.lock);
> @@ -321,6 +345,41 @@ kvm_irqfd_release(struct kvm *kvm)
>  
>  }
>  
> +/* Must be called under irqfds.lock */
> +static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd,
> +			 struct kvm_irq_routing_table *irq_rt)
> +{
> +	struct kvm_kernel_irq_routing_entry *e;
> +	struct hlist_node *n;
> +
> +	if (irqfd->gsi >= irq_rt->nr_rt_entries) {
> +		rcu_assign_pointer(irqfd->irq_entry, NULL);
> +		return;
> +	}
> +
> +	hlist_for_each_entry(e, n, &irq_rt->map[irqfd->gsi], link) {
> +		/* Only fast-path MSI. */
> +		if (e->type == KVM_IRQ_ROUTING_MSI)
> +			rcu_assign_pointer(irqfd->irq_entry, e);
> +		else
> +			rcu_assign_pointer(irqfd->irq_entry, NULL);
> +	}
Shouldn't we flush workqueue if routing entry is gone?

> +}
> +
> +/* Update irqfd after a routing change.  Caller must invoke synchronize_rcu
> + * afterwards. */
> +void kvm_irqfd_update(struct kvm *kvm, struct kvm_irq_routing_table *irq_rt)
> +{
> +	struct _irqfd *irqfd;
> +
> +	spin_lock_irq(&kvm->irqfds.lock);
> +
> +	list_for_each_entry(irqfd, &kvm->irqfds.items, list)
> +		irqfd_update(kvm, irqfd, irq_rt);
> +
> +	spin_unlock_irq(&kvm->irqfds.lock);
> +}
> +
>  /*
>   * create a host-wide workqueue for issuing deferred shutdown requests
>   * aggregated from all vm* instances. We need our own isolated single-thread
> diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> index 8edca91..265ab72 100644
> --- a/virt/kvm/irq_comm.c
> +++ b/virt/kvm/irq_comm.c
> @@ -114,8 +114,8 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
>  	return r;
>  }
>  
> -static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
> -		       struct kvm *kvm, int irq_source_id, int level)
> +int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
> +		struct kvm *kvm, int irq_source_id, int level)
>  {
>  	struct kvm_lapic_irq irq;
>  
> @@ -410,7 +410,9 @@ int kvm_set_irq_routing(struct kvm *kvm,
>  	mutex_lock(&kvm->irq_lock);
>  	old = kvm->irq_routing;
>  	rcu_assign_pointer(kvm->irq_routing, new);
> +	kvm_irqfd_update(kvm, new);
>  	mutex_unlock(&kvm->irq_lock);
> +
>  	synchronize_rcu();
>  
>  	new = old;
> -- 
> 1.7.3.2.91.g446ac

--
			Gleb.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH RFC] kvm: fast-path msi injection with irqfd
  2010-11-18  9:05 ` Gleb Natapov
@ 2010-11-18  9:16   ` Michael S. Tsirkin
  2010-11-18  9:20     ` Gleb Natapov
  2010-11-18  9:34   ` Michael S. Tsirkin
  1 sibling, 1 reply; 22+ messages in thread
From: Michael S. Tsirkin @ 2010-11-18  9:16 UTC (permalink / raw)
  To: Gleb Natapov
  Cc: Avi Kivity, Marcelo Tosatti, Xiao Guangrong, Gregory Haskins,
	Chris Lalancette, kvm, linux-kernel

On Thu, Nov 18, 2010 at 11:05:22AM +0200, Gleb Natapov wrote:
> On Thu, Nov 18, 2010 at 12:12:54AM +0200, Michael S. Tsirkin wrote:
> > Store irq routing table pointer in the irqfd object,
> > and use that to inject MSI directly without bouncing out to
> > a kernel thread.
> > 
> > While we touch this structure, rearrange irqfd fields to make fastpath
> > better packed for better cache utilization.
> > 
> > Some notes on the design:
> > - Use pointer into the rt instead of copying an entry,
> >   to make it possible to use rcu, thus side-stepping
> >   locking complexities.  We also save some memory this way.
> What locking complexity is there with copying entry approach?

Without RCU, we need two locks:
	- irqfd lock to scan the list of irqfds
	- eventfd wqh lock in the irqfd to update the entry
To update all irqfds on list, wqh lock would be nested within irqfd lock.
	lock(kvm->irqfds.lock)
	list_for_each(irqfd, kvm->irqfds.list)
		lock(irqfd->wqh)
		update(irqfd)
		unlock(irqfd->wqh)
	unlock(kvm->irqfds.lock)
Problem is, irqfd is nested within wqh for cleanup (POLLHUP) path.

With RCU we do assign and let sync take care of flushing old entries out.

> > - Old workqueue code is still used for level irqs.
> >   I don't think we DTRT with level anyway, however,
> >   it seems easier to keep the code around as
> >   it has been thought through and debugged, and fix level later than
> >   rip out and re-instate it later.
> > 
> > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > ---
> > 
> > The below is compile tested only.  Sending out for early
> > flames/feedback.  Please review!
> > 
> >  include/linux/kvm_host.h |    4 ++
> >  virt/kvm/eventfd.c       |   81 +++++++++++++++++++++++++++++++++++++++------
> >  virt/kvm/irq_comm.c      |    6 ++-
> >  3 files changed, 78 insertions(+), 13 deletions(-)
> > 
> > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > index a055742..b6f7047 100644
> > --- a/include/linux/kvm_host.h
> > +++ b/include/linux/kvm_host.h
> > @@ -462,6 +462,8 @@ void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
> >  				   unsigned long *deliver_bitmask);
> >  #endif
> >  int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level);
> > +int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm,
> > +		int irq_source_id, int level);
> >  void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
> >  void kvm_register_irq_ack_notifier(struct kvm *kvm,
> >  				   struct kvm_irq_ack_notifier *kian);
> > @@ -603,6 +605,7 @@ static inline void kvm_free_irq_routing(struct kvm *kvm) {}
> >  void kvm_eventfd_init(struct kvm *kvm);
> >  int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags);
> >  void kvm_irqfd_release(struct kvm *kvm);
> > +void kvm_irqfd_update(struct kvm *kvm, struct kvm_irq_routing_table *irq_rt);
> >  int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args);
> >  
> >  #else
> > @@ -614,6 +617,7 @@ static inline int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags)
> >  }
> >  
> >  static inline void kvm_irqfd_release(struct kvm *kvm) {}
> > +static inline void kvm_irqfd_update(struct kvm *kvm) {}
> >  static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
> >  {
> >  	return -ENOSYS;
> > diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
> > index c1f1e3c..49c1864 100644
> > --- a/virt/kvm/eventfd.c
> > +++ b/virt/kvm/eventfd.c
> > @@ -44,14 +44,18 @@
> >   */
> >  
> >  struct _irqfd {
> > -	struct kvm               *kvm;
> > -	struct eventfd_ctx       *eventfd;
> > -	int                       gsi;
> > -	struct list_head          list;
> > -	poll_table                pt;
> > -	wait_queue_t              wait;
> > -	struct work_struct        inject;
> > -	struct work_struct        shutdown;
> > +	/* Used for MSI fast-path */
> > +	struct kvm *kvm;
> > +	wait_queue_t wait;
> > +	struct kvm_kernel_irq_routing_entry __rcu *irq_entry;
> > +	/* Used for level IRQ fast-path */
> > +	int gsi;
> > +	struct work_struct inject;
> > +	/* Used for setup/shutdown */
> > +	struct eventfd_ctx *eventfd;
> > +	struct list_head list;
> > +	poll_table pt;
> > +	struct work_struct shutdown;
> >  };
> >  
> >  static struct workqueue_struct *irqfd_cleanup_wq;
> > @@ -125,10 +129,18 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
> >  {
> >  	struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait);
> >  	unsigned long flags = (unsigned long)key;
> > +	struct kvm_kernel_irq_routing_entry *irq;
> >  
> > -	if (flags & POLLIN)
> > +	if (flags & POLLIN) {
> > +		rcu_read_lock();
> > +		irq = irqfd->irq_entry;
> Why not rcu_dereference()? And why it can't be zero here?
> 
> >  		/* An event has been signaled, inject an interrupt */
> > -		schedule_work(&irqfd->inject);
> > +		if (irq)
> > +			kvm_set_msi(irq, irqfd->kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1);
> > +		else
> > +			schedule_work(&irqfd->inject);
> > +		rcu_read_unlock();
> > +	}
> >  
> >  	if (flags & POLLHUP) {
> >  		/* The eventfd is closing, detach from KVM */
> > @@ -166,6 +178,7 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
> >  static int
> >  kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
> >  {
> > +	struct kvm_irq_routing_table *irq_rt;
> >  	struct _irqfd *irqfd, *tmp;
> >  	struct file *file = NULL;
> >  	struct eventfd_ctx *eventfd = NULL;
> > @@ -215,6 +228,10 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
> >  		goto fail;
> >  	}
> >  
> > +	rcu_read_lock();
> > +	irqfd_update(kvm, irqfd, rcu_dereference(kvm->irq_routing));
> > +	rcu_read_unlock();
> > +
> >  	events = file->f_op->poll(file, &irqfd->pt);
> >  
> >  	list_add_tail(&irqfd->list, &kvm->irqfds.items);
> > @@ -271,8 +288,15 @@ kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi)
> >  	spin_lock_irq(&kvm->irqfds.lock);
> >  
> >  	list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) {
> > -		if (irqfd->eventfd == eventfd && irqfd->gsi == gsi)
> > +		if (irqfd->eventfd == eventfd && irqfd->gsi == gsi) {
> > +			/* This rcu_assign_pointer is needed for when
> > +			 * another thread calls kvm_irqfd_update before
> > +			 * we flush workqueue below.
> > +			 * It is paired with synchronize_rcu done by caller
> > +			 * of that function. */
> > +			rcu_assign_pointer(irqfd->irq_entry, NULL);
> >  			irqfd_deactivate(irqfd);
> > +		}
> >  	}
> >  
> >  	spin_unlock_irq(&kvm->irqfds.lock);
> > @@ -321,6 +345,41 @@ kvm_irqfd_release(struct kvm *kvm)
> >  
> >  }
> >  
> > +/* Must be called under irqfds.lock */
> > +static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd,
> > +			 struct kvm_irq_routing_table *irq_rt)
> > +{
> > +	struct kvm_kernel_irq_routing_entry *e;
> > +	struct hlist_node *n;
> > +
> > +	if (irqfd->gsi >= irq_rt->nr_rt_entries) {
> > +		rcu_assign_pointer(irqfd->irq_entry, NULL);
> > +		return;
> > +	}
> > +
> > +	hlist_for_each_entry(e, n, &irq_rt->map[irqfd->gsi], link) {
> > +		/* Only fast-path MSI. */
> > +		if (e->type == KVM_IRQ_ROUTING_MSI)
> > +			rcu_assign_pointer(irqfd->irq_entry, e);
> > +		else
> > +			rcu_assign_pointer(irqfd->irq_entry, NULL);
> > +	}
> Shouldn't we flush workqueue if routing entry is gone?
> 
> > +}
> > +
> > +/* Update irqfd after a routing change.  Caller must invoke synchronize_rcu
> > + * afterwards. */
> > +void kvm_irqfd_update(struct kvm *kvm, struct kvm_irq_routing_table *irq_rt)
> > +{
> > +	struct _irqfd *irqfd;
> > +
> > +	spin_lock_irq(&kvm->irqfds.lock);
> > +
> > +	list_for_each_entry(irqfd, &kvm->irqfds.items, list)
> > +		irqfd_update(kvm, irqfd, irq_rt);
> > +
> > +	spin_unlock_irq(&kvm->irqfds.lock);
> > +}
> > +
> >  /*
> >   * create a host-wide workqueue for issuing deferred shutdown requests
> >   * aggregated from all vm* instances. We need our own isolated single-thread
> > diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> > index 8edca91..265ab72 100644
> > --- a/virt/kvm/irq_comm.c
> > +++ b/virt/kvm/irq_comm.c
> > @@ -114,8 +114,8 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
> >  	return r;
> >  }
> >  
> > -static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
> > -		       struct kvm *kvm, int irq_source_id, int level)
> > +int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
> > +		struct kvm *kvm, int irq_source_id, int level)
> >  {
> >  	struct kvm_lapic_irq irq;
> >  
> > @@ -410,7 +410,9 @@ int kvm_set_irq_routing(struct kvm *kvm,
> >  	mutex_lock(&kvm->irq_lock);
> >  	old = kvm->irq_routing;
> >  	rcu_assign_pointer(kvm->irq_routing, new);
> > +	kvm_irqfd_update(kvm, new);
> >  	mutex_unlock(&kvm->irq_lock);
> > +
> >  	synchronize_rcu();
> >  
> >  	new = old;
> > -- 
> > 1.7.3.2.91.g446ac
> 
> --
> 			Gleb.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH RFC] kvm: fast-path msi injection with irqfd
  2010-11-18  9:16   ` Michael S. Tsirkin
@ 2010-11-18  9:20     ` Gleb Natapov
  0 siblings, 0 replies; 22+ messages in thread
From: Gleb Natapov @ 2010-11-18  9:20 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Avi Kivity, Marcelo Tosatti, Xiao Guangrong, Gregory Haskins,
	Chris Lalancette, kvm, linux-kernel

On Thu, Nov 18, 2010 at 11:16:02AM +0200, Michael S. Tsirkin wrote:
> On Thu, Nov 18, 2010 at 11:05:22AM +0200, Gleb Natapov wrote:
> > On Thu, Nov 18, 2010 at 12:12:54AM +0200, Michael S. Tsirkin wrote:
> > > Store irq routing table pointer in the irqfd object,
> > > and use that to inject MSI directly without bouncing out to
> > > a kernel thread.
> > > 
> > > While we touch this structure, rearrange irqfd fields to make fastpath
> > > better packed for better cache utilization.
> > > 
> > > Some notes on the design:
> > > - Use pointer into the rt instead of copying an entry,
> > >   to make it possible to use rcu, thus side-stepping
> > >   locking complexities.  We also save some memory this way.
> > What locking complexity is there with copying entry approach?
> 
> Without RCU, we need two locks:
> 	- irqfd lock to scan the list of irqfds
> 	- eventfd wqh lock in the irqfd to update the entry
> To update all irqfds on list, wqh lock would be nested within irqfd lock.
> 	lock(kvm->irqfds.lock)
> 	list_for_each(irqfd, kvm->irqfds.list)
> 		lock(irqfd->wqh)
> 		update(irqfd)
> 		unlock(irqfd->wqh)
> 	unlock(kvm->irqfds.lock)
> Problem is, irqfd is nested within wqh for cleanup (POLLHUP) path.
> 
> With RCU we do assign and let sync take care of flushing old entries out.
> 
Make sense. What about other comments :)

> > > - Old workqueue code is still used for level irqs.
> > >   I don't think we DTRT with level anyway, however,
> > >   it seems easier to keep the code around as
> > >   it has been thought through and debugged, and fix level later than
> > >   rip out and re-instate it later.
> > > 
> > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > > ---
> > > 
> > > The below is compile tested only.  Sending out for early
> > > flames/feedback.  Please review!
> > > 
> > >  include/linux/kvm_host.h |    4 ++
> > >  virt/kvm/eventfd.c       |   81 +++++++++++++++++++++++++++++++++++++++------
> > >  virt/kvm/irq_comm.c      |    6 ++-
> > >  3 files changed, 78 insertions(+), 13 deletions(-)
> > > 
> > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > > index a055742..b6f7047 100644
> > > --- a/include/linux/kvm_host.h
> > > +++ b/include/linux/kvm_host.h
> > > @@ -462,6 +462,8 @@ void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
> > >  				   unsigned long *deliver_bitmask);
> > >  #endif
> > >  int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level);
> > > +int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm,
> > > +		int irq_source_id, int level);
> > >  void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
> > >  void kvm_register_irq_ack_notifier(struct kvm *kvm,
> > >  				   struct kvm_irq_ack_notifier *kian);
> > > @@ -603,6 +605,7 @@ static inline void kvm_free_irq_routing(struct kvm *kvm) {}
> > >  void kvm_eventfd_init(struct kvm *kvm);
> > >  int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags);
> > >  void kvm_irqfd_release(struct kvm *kvm);
> > > +void kvm_irqfd_update(struct kvm *kvm, struct kvm_irq_routing_table *irq_rt);
> > >  int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args);
> > >  
> > >  #else
> > > @@ -614,6 +617,7 @@ static inline int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags)
> > >  }
> > >  
> > >  static inline void kvm_irqfd_release(struct kvm *kvm) {}
> > > +static inline void kvm_irqfd_update(struct kvm *kvm) {}
> > >  static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
> > >  {
> > >  	return -ENOSYS;
> > > diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
> > > index c1f1e3c..49c1864 100644
> > > --- a/virt/kvm/eventfd.c
> > > +++ b/virt/kvm/eventfd.c
> > > @@ -44,14 +44,18 @@
> > >   */
> > >  
> > >  struct _irqfd {
> > > -	struct kvm               *kvm;
> > > -	struct eventfd_ctx       *eventfd;
> > > -	int                       gsi;
> > > -	struct list_head          list;
> > > -	poll_table                pt;
> > > -	wait_queue_t              wait;
> > > -	struct work_struct        inject;
> > > -	struct work_struct        shutdown;
> > > +	/* Used for MSI fast-path */
> > > +	struct kvm *kvm;
> > > +	wait_queue_t wait;
> > > +	struct kvm_kernel_irq_routing_entry __rcu *irq_entry;
> > > +	/* Used for level IRQ fast-path */
> > > +	int gsi;
> > > +	struct work_struct inject;
> > > +	/* Used for setup/shutdown */
> > > +	struct eventfd_ctx *eventfd;
> > > +	struct list_head list;
> > > +	poll_table pt;
> > > +	struct work_struct shutdown;
> > >  };
> > >  
> > >  static struct workqueue_struct *irqfd_cleanup_wq;
> > > @@ -125,10 +129,18 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
> > >  {
> > >  	struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait);
> > >  	unsigned long flags = (unsigned long)key;
> > > +	struct kvm_kernel_irq_routing_entry *irq;
> > >  
> > > -	if (flags & POLLIN)
> > > +	if (flags & POLLIN) {
> > > +		rcu_read_lock();
> > > +		irq = irqfd->irq_entry;
> > Why not rcu_dereference()? And why it can't be zero here?
> > 
> > >  		/* An event has been signaled, inject an interrupt */
> > > -		schedule_work(&irqfd->inject);
> > > +		if (irq)
> > > +			kvm_set_msi(irq, irqfd->kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1);
> > > +		else
> > > +			schedule_work(&irqfd->inject);
> > > +		rcu_read_unlock();
> > > +	}
> > >  
> > >  	if (flags & POLLHUP) {
> > >  		/* The eventfd is closing, detach from KVM */
> > > @@ -166,6 +178,7 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
> > >  static int
> > >  kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
> > >  {
> > > +	struct kvm_irq_routing_table *irq_rt;
> > >  	struct _irqfd *irqfd, *tmp;
> > >  	struct file *file = NULL;
> > >  	struct eventfd_ctx *eventfd = NULL;
> > > @@ -215,6 +228,10 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
> > >  		goto fail;
> > >  	}
> > >  
> > > +	rcu_read_lock();
> > > +	irqfd_update(kvm, irqfd, rcu_dereference(kvm->irq_routing));
> > > +	rcu_read_unlock();
> > > +
> > >  	events = file->f_op->poll(file, &irqfd->pt);
> > >  
> > >  	list_add_tail(&irqfd->list, &kvm->irqfds.items);
> > > @@ -271,8 +288,15 @@ kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi)
> > >  	spin_lock_irq(&kvm->irqfds.lock);
> > >  
> > >  	list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) {
> > > -		if (irqfd->eventfd == eventfd && irqfd->gsi == gsi)
> > > +		if (irqfd->eventfd == eventfd && irqfd->gsi == gsi) {
> > > +			/* This rcu_assign_pointer is needed for when
> > > +			 * another thread calls kvm_irqfd_update before
> > > +			 * we flush workqueue below.
> > > +			 * It is paired with synchronize_rcu done by caller
> > > +			 * of that function. */
> > > +			rcu_assign_pointer(irqfd->irq_entry, NULL);
> > >  			irqfd_deactivate(irqfd);
> > > +		}
> > >  	}
> > >  
> > >  	spin_unlock_irq(&kvm->irqfds.lock);
> > > @@ -321,6 +345,41 @@ kvm_irqfd_release(struct kvm *kvm)
> > >  
> > >  }
> > >  
> > > +/* Must be called under irqfds.lock */
> > > +static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd,
> > > +			 struct kvm_irq_routing_table *irq_rt)
> > > +{
> > > +	struct kvm_kernel_irq_routing_entry *e;
> > > +	struct hlist_node *n;
> > > +
> > > +	if (irqfd->gsi >= irq_rt->nr_rt_entries) {
> > > +		rcu_assign_pointer(irqfd->irq_entry, NULL);
> > > +		return;
> > > +	}
> > > +
> > > +	hlist_for_each_entry(e, n, &irq_rt->map[irqfd->gsi], link) {
> > > +		/* Only fast-path MSI. */
> > > +		if (e->type == KVM_IRQ_ROUTING_MSI)
> > > +			rcu_assign_pointer(irqfd->irq_entry, e);
> > > +		else
> > > +			rcu_assign_pointer(irqfd->irq_entry, NULL);
> > > +	}
> > Shouldn't we flush workqueue if routing entry is gone?
> > 
> > > +}
> > > +
> > > +/* Update irqfd after a routing change.  Caller must invoke synchronize_rcu
> > > + * afterwards. */
> > > +void kvm_irqfd_update(struct kvm *kvm, struct kvm_irq_routing_table *irq_rt)
> > > +{
> > > +	struct _irqfd *irqfd;
> > > +
> > > +	spin_lock_irq(&kvm->irqfds.lock);
> > > +
> > > +	list_for_each_entry(irqfd, &kvm->irqfds.items, list)
> > > +		irqfd_update(kvm, irqfd, irq_rt);
> > > +
> > > +	spin_unlock_irq(&kvm->irqfds.lock);
> > > +}
> > > +
> > >  /*
> > >   * create a host-wide workqueue for issuing deferred shutdown requests
> > >   * aggregated from all vm* instances. We need our own isolated single-thread
> > > diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> > > index 8edca91..265ab72 100644
> > > --- a/virt/kvm/irq_comm.c
> > > +++ b/virt/kvm/irq_comm.c
> > > @@ -114,8 +114,8 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
> > >  	return r;
> > >  }
> > >  
> > > -static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
> > > -		       struct kvm *kvm, int irq_source_id, int level)
> > > +int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
> > > +		struct kvm *kvm, int irq_source_id, int level)
> > >  {
> > >  	struct kvm_lapic_irq irq;
> > >  
> > > @@ -410,7 +410,9 @@ int kvm_set_irq_routing(struct kvm *kvm,
> > >  	mutex_lock(&kvm->irq_lock);
> > >  	old = kvm->irq_routing;
> > >  	rcu_assign_pointer(kvm->irq_routing, new);
> > > +	kvm_irqfd_update(kvm, new);
> > >  	mutex_unlock(&kvm->irq_lock);
> > > +
> > >  	synchronize_rcu();
> > >  
> > >  	new = old;
> > > -- 
> > > 1.7.3.2.91.g446ac
> > 
> > --
> > 			Gleb.

--
			Gleb.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH RFC] kvm: fast-path msi injection with irqfd
  2010-11-18  9:05 ` Gleb Natapov
  2010-11-18  9:16   ` Michael S. Tsirkin
@ 2010-11-18  9:34   ` Michael S. Tsirkin
  2010-11-18 10:04     ` Gleb Natapov
  1 sibling, 1 reply; 22+ messages in thread
From: Michael S. Tsirkin @ 2010-11-18  9:34 UTC (permalink / raw)
  To: Gleb Natapov
  Cc: Avi Kivity, Marcelo Tosatti, Xiao Guangrong, Gregory Haskins,
	Chris Lalancette, kvm, linux-kernel

On Thu, Nov 18, 2010 at 11:05:22AM +0200, Gleb Natapov wrote:
> On Thu, Nov 18, 2010 at 12:12:54AM +0200, Michael S. Tsirkin wrote:
> > Store irq routing table pointer in the irqfd object,
> > and use that to inject MSI directly without bouncing out to
> > a kernel thread.
> > 
> > While we touch this structure, rearrange irqfd fields to make fastpath
> > better packed for better cache utilization.
> > 
> > Some notes on the design:
> > - Use pointer into the rt instead of copying an entry,
> >   to make it possible to use rcu, thus side-stepping
> >   locking complexities.  We also save some memory this way.
> What locking complexity is there with copying entry approach?
> 
> > - Old workqueue code is still used for level irqs.
> >   I don't think we DTRT with level anyway, however,
> >   it seems easier to keep the code around as
> >   it has been thought through and debugged, and fix level later than
> >   rip out and re-instate it later.
> > 
> > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > ---
> > 
> > The below is compile tested only.  Sending out for early
> > flames/feedback.  Please review!
> > 
> >  include/linux/kvm_host.h |    4 ++
> >  virt/kvm/eventfd.c       |   81 +++++++++++++++++++++++++++++++++++++++------
> >  virt/kvm/irq_comm.c      |    6 ++-
> >  3 files changed, 78 insertions(+), 13 deletions(-)
> > 
> > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > index a055742..b6f7047 100644
> > --- a/include/linux/kvm_host.h
> > +++ b/include/linux/kvm_host.h
> > @@ -462,6 +462,8 @@ void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
> >  				   unsigned long *deliver_bitmask);
> >  #endif
> >  int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level);
> > +int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm,
> > +		int irq_source_id, int level);
> >  void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
> >  void kvm_register_irq_ack_notifier(struct kvm *kvm,
> >  				   struct kvm_irq_ack_notifier *kian);
> > @@ -603,6 +605,7 @@ static inline void kvm_free_irq_routing(struct kvm *kvm) {}
> >  void kvm_eventfd_init(struct kvm *kvm);
> >  int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags);
> >  void kvm_irqfd_release(struct kvm *kvm);
> > +void kvm_irqfd_update(struct kvm *kvm, struct kvm_irq_routing_table *irq_rt);
> >  int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args);
> >  
> >  #else
> > @@ -614,6 +617,7 @@ static inline int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags)
> >  }
> >  
> >  static inline void kvm_irqfd_release(struct kvm *kvm) {}
> > +static inline void kvm_irqfd_update(struct kvm *kvm) {}
> >  static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
> >  {
> >  	return -ENOSYS;
> > diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
> > index c1f1e3c..49c1864 100644
> > --- a/virt/kvm/eventfd.c
> > +++ b/virt/kvm/eventfd.c
> > @@ -44,14 +44,18 @@
> >   */
> >  
> >  struct _irqfd {
> > -	struct kvm               *kvm;
> > -	struct eventfd_ctx       *eventfd;
> > -	int                       gsi;
> > -	struct list_head          list;
> > -	poll_table                pt;
> > -	wait_queue_t              wait;
> > -	struct work_struct        inject;
> > -	struct work_struct        shutdown;
> > +	/* Used for MSI fast-path */
> > +	struct kvm *kvm;
> > +	wait_queue_t wait;
> > +	struct kvm_kernel_irq_routing_entry __rcu *irq_entry;
> > +	/* Used for level IRQ fast-path */
> > +	int gsi;
> > +	struct work_struct inject;
> > +	/* Used for setup/shutdown */
> > +	struct eventfd_ctx *eventfd;
> > +	struct list_head list;
> > +	poll_table pt;
> > +	struct work_struct shutdown;
> >  };
> >  
> >  static struct workqueue_struct *irqfd_cleanup_wq;
> > @@ -125,10 +129,18 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
> >  {
> >  	struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait);
> >  	unsigned long flags = (unsigned long)key;
> > +	struct kvm_kernel_irq_routing_entry *irq;
> >  
> > -	if (flags & POLLIN)
> > +	if (flags & POLLIN) {
> > +		rcu_read_lock();
> > +		irq = irqfd->irq_entry;
> Why not rcu_dereference()?

Of course. Good catch, thanks.

> And why it can't be zero here?

It can, I check below.

> >  		/* An event has been signaled, inject an interrupt */
> > -		schedule_work(&irqfd->inject);
> > +		if (irq)
> > +			kvm_set_msi(irq, irqfd->kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1);
> > +		else
> > +			schedule_work(&irqfd->inject);
> > +		rcu_read_unlock();
> > +	}
> >  
> >  	if (flags & POLLHUP) {
> >  		/* The eventfd is closing, detach from KVM */
> > @@ -166,6 +178,7 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
> >  static int
> >  kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
> >  {
> > +	struct kvm_irq_routing_table *irq_rt;
> >  	struct _irqfd *irqfd, *tmp;
> >  	struct file *file = NULL;
> >  	struct eventfd_ctx *eventfd = NULL;
> > @@ -215,6 +228,10 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
> >  		goto fail;
> >  	}
> >  
> > +	rcu_read_lock();
> > +	irqfd_update(kvm, irqfd, rcu_dereference(kvm->irq_routing));
> > +	rcu_read_unlock();
> > +
> >  	events = file->f_op->poll(file, &irqfd->pt);
> >  
> >  	list_add_tail(&irqfd->list, &kvm->irqfds.items);
> > @@ -271,8 +288,15 @@ kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi)
> >  	spin_lock_irq(&kvm->irqfds.lock);
> >  
> >  	list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) {
> > -		if (irqfd->eventfd == eventfd && irqfd->gsi == gsi)
> > +		if (irqfd->eventfd == eventfd && irqfd->gsi == gsi) {
> > +			/* This rcu_assign_pointer is needed for when
> > +			 * another thread calls kvm_irqfd_update before
> > +			 * we flush workqueue below.
> > +			 * It is paired with synchronize_rcu done by caller
> > +			 * of that function. */
> > +			rcu_assign_pointer(irqfd->irq_entry, NULL);
> >  			irqfd_deactivate(irqfd);
> > +		}
> >  	}
> >  
> >  	spin_unlock_irq(&kvm->irqfds.lock);
> > @@ -321,6 +345,41 @@ kvm_irqfd_release(struct kvm *kvm)
> >  
> >  }
> >  
> > +/* Must be called under irqfds.lock */
> > +static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd,
> > +			 struct kvm_irq_routing_table *irq_rt)
> > +{
> > +	struct kvm_kernel_irq_routing_entry *e;
> > +	struct hlist_node *n;
> > +
> > +	if (irqfd->gsi >= irq_rt->nr_rt_entries) {
> > +		rcu_assign_pointer(irqfd->irq_entry, NULL);
> > +		return;
> > +	}
> > +
> > +	hlist_for_each_entry(e, n, &irq_rt->map[irqfd->gsi], link) {
> > +		/* Only fast-path MSI. */
> > +		if (e->type == KVM_IRQ_ROUTING_MSI)
> > +			rcu_assign_pointer(irqfd->irq_entry, e);
> > +		else
> > +			rcu_assign_pointer(irqfd->irq_entry, NULL);
> > +	}
> Shouldn't we flush workqueue if routing entry is gone?

You mean synchronize_rcu I think: we don't use
the entry from the workqueue, always from RCU read side critical section
(that's why it's tagged __rcu).  Caller of kvm_irqfd_update must do
synchronize_rcu and the comment below notes this.

> > +}
> > +
> > +/* Update irqfd after a routing change.  Caller must invoke synchronize_rcu
> > + * afterwards. */
> > +void kvm_irqfd_update(struct kvm *kvm, struct kvm_irq_routing_table *irq_rt)
> > +{
> > +	struct _irqfd *irqfd;
> > +
> > +	spin_lock_irq(&kvm->irqfds.lock);
> > +
> > +	list_for_each_entry(irqfd, &kvm->irqfds.items, list)
> > +		irqfd_update(kvm, irqfd, irq_rt);
> > +
> > +	spin_unlock_irq(&kvm->irqfds.lock);
> > +}
> > +
> >  /*
> >   * create a host-wide workqueue for issuing deferred shutdown requests
> >   * aggregated from all vm* instances. We need our own isolated single-thread
> > diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> > index 8edca91..265ab72 100644
> > --- a/virt/kvm/irq_comm.c
> > +++ b/virt/kvm/irq_comm.c
> > @@ -114,8 +114,8 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
> >  	return r;
> >  }
> >  
> > -static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
> > -		       struct kvm *kvm, int irq_source_id, int level)
> > +int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
> > +		struct kvm *kvm, int irq_source_id, int level)
> >  {
> >  	struct kvm_lapic_irq irq;
> >  
> > @@ -410,7 +410,9 @@ int kvm_set_irq_routing(struct kvm *kvm,
> >  	mutex_lock(&kvm->irq_lock);
> >  	old = kvm->irq_routing;
> >  	rcu_assign_pointer(kvm->irq_routing, new);
> > +	kvm_irqfd_update(kvm, new);
> >  	mutex_unlock(&kvm->irq_lock);
> > +
> >  	synchronize_rcu();
> >  
> >  	new = old;
> > -- 
> > 1.7.3.2.91.g446ac
> 
> --
> 			Gleb.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH RFC] kvm: fast-path msi injection with irqfd
  2010-11-17 22:12 [PATCH RFC] kvm: fast-path msi injection with irqfd Michael S. Tsirkin
  2010-11-18  9:05 ` Gleb Natapov
@ 2010-11-18  9:55 ` Avi Kivity
  2010-11-18 10:57   ` Michael S. Tsirkin
  2 siblings, 0 replies; 22+ messages in thread
From: Avi Kivity @ 2010-11-18  9:55 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Marcelo Tosatti, Gleb Natapov, Xiao Guangrong, Gregory Haskins,
	Chris Lalancette, kvm, linux-kernel

On 11/18/2010 12:12 AM, Michael S. Tsirkin wrote:
> Store irq routing table pointer in the irqfd object,
> and use that to inject MSI directly without bouncing out to
> a kernel thread.
>
> While we touch this structure, rearrange irqfd fields to make fastpath
> better packed for better cache utilization.
>
> Some notes on the design:
> - Use pointer into the rt instead of copying an entry,
>    to make it possible to use rcu, thus side-stepping
>    locking complexities.  We also save some memory this way.
> - Old workqueue code is still used for level irqs.
>    I don't think we DTRT with level anyway, however,
>    it seems easier to keep the code around as
>    it has been thought through and debugged, and fix level later than
>    rip out and re-instate it later.
>
>

> @@ -166,6 +178,7 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
>   static int
>   kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
>   {
> +	struct kvm_irq_routing_table *irq_rt;
>   	struct _irqfd *irqfd, *tmp;
>   	struct file *file = NULL;
>   	struct eventfd_ctx *eventfd = NULL;
> @@ -215,6 +228,10 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
>   		goto fail;
>   	}
>
> +	rcu_read_lock();
> +	irqfd_update(kvm, irqfd, rcu_dereference(kvm->irq_routing));
> +	rcu_read_unlock();

Wow, complicated.  rcu_read_lock() protects kvm->irq_routing, while 
we're in the update side of rcu-protected irqfd->irq_entry.

A comment please.

The rest looks good, it's nice we finally got the irq injection path so 
streamlined.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH RFC] kvm: fast-path msi injection with irqfd
  2010-11-18  9:34   ` Michael S. Tsirkin
@ 2010-11-18 10:04     ` Gleb Natapov
  0 siblings, 0 replies; 22+ messages in thread
From: Gleb Natapov @ 2010-11-18 10:04 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Avi Kivity, Marcelo Tosatti, Xiao Guangrong, Gregory Haskins,
	Chris Lalancette, kvm, linux-kernel

On Thu, Nov 18, 2010 at 11:34:26AM +0200, Michael S. Tsirkin wrote:
> > > @@ -125,10 +129,18 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
> > >  {
> > >  	struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait);
> > >  	unsigned long flags = (unsigned long)key;
> > > +	struct kvm_kernel_irq_routing_entry *irq;
> > >  
> > > -	if (flags & POLLIN)
> > > +	if (flags & POLLIN) {
> > > +		rcu_read_lock();
> > > +		irq = irqfd->irq_entry;
> > Why not rcu_dereference()?
> 
> Of course. Good catch, thanks.
> 
> > And why it can't be zero here?
> 
> It can, I check below.
> 
Yeah, missed that. Thanks.

> > >  		/* An event has been signaled, inject an interrupt */
> > > -		schedule_work(&irqfd->inject);
> > > +		if (irq)
> > > +			kvm_set_msi(irq, irqfd->kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1);
> > > +		else
> > > +			schedule_work(&irqfd->inject);
> > > +		rcu_read_unlock();
> > > +	}
> > >  

--
			Gleb.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH RFC] kvm: fast-path msi injection with irqfd
  2010-11-17 22:12 [PATCH RFC] kvm: fast-path msi injection with irqfd Michael S. Tsirkin
@ 2010-11-18 10:57   ` Michael S. Tsirkin
  2010-11-18  9:55 ` Avi Kivity
  2010-11-18 10:57   ` Michael S. Tsirkin
  2 siblings, 0 replies; 22+ messages in thread
From: Michael S. Tsirkin @ 2010-11-18 10:57 UTC (permalink / raw)
  To: Avi Kivity, Marcelo Tosatti, Gleb Natapov, Xiao Guangrong,
	Gregory Haskins, Chris Lalancette, kvm, linux-kernel


So the following on top will fix it all.
Any more comments befpre I bundle it up,
test and report?

kvm: fix up msi fastpath

This will be folded into the msi fastpath patch.
Changes:
- simplify irq_entry/irq_routing update rules:
  simply to it all under irqfds.lock
- document locking for rcu update side
- rcu_dereference for rcu pointer access

Still compile-tested only.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

---

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b6f7047..d13ced3 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -16,6 +16,7 @@
 #include <linux/mm.h>
 #include <linux/preempt.h>
 #include <linux/msi.h>
+#include <linux/rcupdate.h>
 #include <asm/signal.h>
 
 #include <linux/kvm.h>
@@ -206,6 +207,8 @@ struct kvm {
 
 	struct mutex irq_lock;
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
+	/* Update side is protected by irq_lock and,
+	 * if configured, irqfds.lock. */
 	struct kvm_irq_routing_table __rcu *irq_routing;
 	struct hlist_head mask_notifier_list;
 	struct hlist_head irq_ack_notifier_list;
@@ -605,7 +608,7 @@ static inline void kvm_free_irq_routing(struct kvm *kvm) {}
 void kvm_eventfd_init(struct kvm *kvm);
 int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags);
 void kvm_irqfd_release(struct kvm *kvm);
-void kvm_irqfd_update(struct kvm *kvm, struct kvm_irq_routing_table *irq_rt);
+void kvm_irq_routing_update(struct kvm *, struct kvm_irq_routing_table *);
 int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args);
 
 #else
@@ -617,7 +620,12 @@ static inline int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags)
 }
 
 static inline void kvm_irqfd_release(struct kvm *kvm) {}
-static inline void kvm_irqfd_update(struct kvm *kvm) {}
+static inline void kvm_irq_routing_update(struct kvm *kvm,
+					  struct kvm_irq_routing_table *irq_rt)
+{
+	rcu_assign_pointer(kvm->irq_routing, irq_rt);
+}
+
 static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 {
 	return -ENOSYS;
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 49c1864..b0cfae7 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -47,6 +47,7 @@ struct _irqfd {
 	/* Used for MSI fast-path */
 	struct kvm *kvm;
 	wait_queue_t wait;
+	/* Update side is protected by irqfds.lock */
 	struct kvm_kernel_irq_routing_entry __rcu *irq_entry;
 	/* Used for level IRQ fast-path */
 	int gsi;
@@ -133,7 +134,7 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
 
 	if (flags & POLLIN) {
 		rcu_read_lock();
-		irq = irqfd->irq_entry;
+		irq = rcu_dereference(irqfd->irq_entry);
 		/* An event has been signaled, inject an interrupt */
 		if (irq)
 			kvm_set_msi(irq, irqfd->kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1);
@@ -175,6 +176,27 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
 	add_wait_queue(wqh, &irqfd->wait);
 }
 
+/* Must be called under irqfds.lock */
+static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd,
+			 struct kvm_irq_routing_table *irq_rt)
+{
+	struct kvm_kernel_irq_routing_entry *e;
+	struct hlist_node *n;
+
+	if (irqfd->gsi >= irq_rt->nr_rt_entries) {
+		rcu_assign_pointer(irqfd->irq_entry, NULL);
+		return;
+	}
+
+	hlist_for_each_entry(e, n, &irq_rt->map[irqfd->gsi], link) {
+		/* Only fast-path MSI. */
+		if (e->type == KVM_IRQ_ROUTING_MSI)
+			rcu_assign_pointer(irqfd->irq_entry, e);
+		else
+			rcu_assign_pointer(irqfd->irq_entry, NULL);
+	}
+}
+
 static int
 kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
 {
@@ -228,9 +250,9 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
 		goto fail;
 	}
 
-	rcu_read_lock();
-	irqfd_update(kvm, irqfd, rcu_dereference(kvm->irq_routing));
-	rcu_read_unlock();
+	irq_rt = rcu_dereference_protected(kvm->irq_routing,
+					   lockdep_is_held(&kvm->irqfds.lock));
+	irqfd_update(kvm, irqfd, irq_rt);
 
 	events = file->f_op->poll(file, &irqfd->pt);
 
@@ -345,35 +367,17 @@ kvm_irqfd_release(struct kvm *kvm)
 
 }
 
-/* Must be called under irqfds.lock */
-static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd,
-			 struct kvm_irq_routing_table *irq_rt)
-{
-	struct kvm_kernel_irq_routing_entry *e;
-	struct hlist_node *n;
-
-	if (irqfd->gsi >= irq_rt->nr_rt_entries) {
-		rcu_assign_pointer(irqfd->irq_entry, NULL);
-		return;
-	}
-
-	hlist_for_each_entry(e, n, &irq_rt->map[irqfd->gsi], link) {
-		/* Only fast-path MSI. */
-		if (e->type == KVM_IRQ_ROUTING_MSI)
-			rcu_assign_pointer(irqfd->irq_entry, e);
-		else
-			rcu_assign_pointer(irqfd->irq_entry, NULL);
-	}
-}
-
-/* Update irqfd after a routing change.  Caller must invoke synchronize_rcu
+/* Change irq_routing and irqfd.  Caller must invoke synchronize_rcu
  * afterwards. */
-void kvm_irqfd_update(struct kvm *kvm, struct kvm_irq_routing_table *irq_rt)
+void kvm_irq_routing_update(struct kvm *kvm,
+			    struct kvm_irq_routing_table *irq_rt)
 {
 	struct _irqfd *irqfd;
 
 	spin_lock_irq(&kvm->irqfds.lock);
 
+	rcu_assign_pointer(kvm->irq_routing, irq_rt);
+
 	list_for_each_entry(irqfd, &kvm->irqfds.items, list)
 		irqfd_update(kvm, irqfd, irq_rt);
 
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 265ab72..9f614b4 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -409,8 +409,7 @@ int kvm_set_irq_routing(struct kvm *kvm,
 
 	mutex_lock(&kvm->irq_lock);
 	old = kvm->irq_routing;
-	rcu_assign_pointer(kvm->irq_routing, new);
-	kvm_irqfd_update(kvm, new);
+	kvm_irq_routing_update(kvm, new);
 	mutex_unlock(&kvm->irq_lock);
 
 	synchronize_rcu();

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH RFC] kvm: fast-path msi injection with irqfd
@ 2010-11-18 10:57   ` Michael S. Tsirkin
  0 siblings, 0 replies; 22+ messages in thread
From: Michael S. Tsirkin @ 2010-11-18 10:57 UTC (permalink / raw)
  To: Avi Kivity, Marcelo Tosatti, Gleb Natapov, Xiao Guangrong,
	Gregory Haskins


So the following on top will fix it all.
Any more comments befpre I bundle it up,
test and report?

kvm: fix up msi fastpath

This will be folded into the msi fastpath patch.
Changes:
- simplify irq_entry/irq_routing update rules:
  simply to it all under irqfds.lock
- document locking for rcu update side
- rcu_dereference for rcu pointer access

Still compile-tested only.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

---

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b6f7047..d13ced3 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -16,6 +16,7 @@
 #include <linux/mm.h>
 #include <linux/preempt.h>
 #include <linux/msi.h>
+#include <linux/rcupdate.h>
 #include <asm/signal.h>
 
 #include <linux/kvm.h>
@@ -206,6 +207,8 @@ struct kvm {
 
 	struct mutex irq_lock;
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
+	/* Update side is protected by irq_lock and,
+	 * if configured, irqfds.lock. */
 	struct kvm_irq_routing_table __rcu *irq_routing;
 	struct hlist_head mask_notifier_list;
 	struct hlist_head irq_ack_notifier_list;
@@ -605,7 +608,7 @@ static inline void kvm_free_irq_routing(struct kvm *kvm) {}
 void kvm_eventfd_init(struct kvm *kvm);
 int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags);
 void kvm_irqfd_release(struct kvm *kvm);
-void kvm_irqfd_update(struct kvm *kvm, struct kvm_irq_routing_table *irq_rt);
+void kvm_irq_routing_update(struct kvm *, struct kvm_irq_routing_table *);
 int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args);
 
 #else
@@ -617,7 +620,12 @@ static inline int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags)
 }
 
 static inline void kvm_irqfd_release(struct kvm *kvm) {}
-static inline void kvm_irqfd_update(struct kvm *kvm) {}
+static inline void kvm_irq_routing_update(struct kvm *kvm,
+					  struct kvm_irq_routing_table *irq_rt)
+{
+	rcu_assign_pointer(kvm->irq_routing, irq_rt);
+}
+
 static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 {
 	return -ENOSYS;
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 49c1864..b0cfae7 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -47,6 +47,7 @@ struct _irqfd {
 	/* Used for MSI fast-path */
 	struct kvm *kvm;
 	wait_queue_t wait;
+	/* Update side is protected by irqfds.lock */
 	struct kvm_kernel_irq_routing_entry __rcu *irq_entry;
 	/* Used for level IRQ fast-path */
 	int gsi;
@@ -133,7 +134,7 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
 
 	if (flags & POLLIN) {
 		rcu_read_lock();
-		irq = irqfd->irq_entry;
+		irq = rcu_dereference(irqfd->irq_entry);
 		/* An event has been signaled, inject an interrupt */
 		if (irq)
 			kvm_set_msi(irq, irqfd->kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1);
@@ -175,6 +176,27 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
 	add_wait_queue(wqh, &irqfd->wait);
 }
 
+/* Must be called under irqfds.lock */
+static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd,
+			 struct kvm_irq_routing_table *irq_rt)
+{
+	struct kvm_kernel_irq_routing_entry *e;
+	struct hlist_node *n;
+
+	if (irqfd->gsi >= irq_rt->nr_rt_entries) {
+		rcu_assign_pointer(irqfd->irq_entry, NULL);
+		return;
+	}
+
+	hlist_for_each_entry(e, n, &irq_rt->map[irqfd->gsi], link) {
+		/* Only fast-path MSI. */
+		if (e->type == KVM_IRQ_ROUTING_MSI)
+			rcu_assign_pointer(irqfd->irq_entry, e);
+		else
+			rcu_assign_pointer(irqfd->irq_entry, NULL);
+	}
+}
+
 static int
 kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
 {
@@ -228,9 +250,9 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
 		goto fail;
 	}
 
-	rcu_read_lock();
-	irqfd_update(kvm, irqfd, rcu_dereference(kvm->irq_routing));
-	rcu_read_unlock();
+	irq_rt = rcu_dereference_protected(kvm->irq_routing,
+					   lockdep_is_held(&kvm->irqfds.lock));
+	irqfd_update(kvm, irqfd, irq_rt);
 
 	events = file->f_op->poll(file, &irqfd->pt);
 
@@ -345,35 +367,17 @@ kvm_irqfd_release(struct kvm *kvm)
 
 }
 
-/* Must be called under irqfds.lock */
-static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd,
-			 struct kvm_irq_routing_table *irq_rt)
-{
-	struct kvm_kernel_irq_routing_entry *e;
-	struct hlist_node *n;
-
-	if (irqfd->gsi >= irq_rt->nr_rt_entries) {
-		rcu_assign_pointer(irqfd->irq_entry, NULL);
-		return;
-	}
-
-	hlist_for_each_entry(e, n, &irq_rt->map[irqfd->gsi], link) {
-		/* Only fast-path MSI. */
-		if (e->type == KVM_IRQ_ROUTING_MSI)
-			rcu_assign_pointer(irqfd->irq_entry, e);
-		else
-			rcu_assign_pointer(irqfd->irq_entry, NULL);
-	}
-}
-
-/* Update irqfd after a routing change.  Caller must invoke synchronize_rcu
+/* Change irq_routing and irqfd.  Caller must invoke synchronize_rcu
  * afterwards. */
-void kvm_irqfd_update(struct kvm *kvm, struct kvm_irq_routing_table *irq_rt)
+void kvm_irq_routing_update(struct kvm *kvm,
+			    struct kvm_irq_routing_table *irq_rt)
 {
 	struct _irqfd *irqfd;
 
 	spin_lock_irq(&kvm->irqfds.lock);
 
+	rcu_assign_pointer(kvm->irq_routing, irq_rt);
+
 	list_for_each_entry(irqfd, &kvm->irqfds.items, list)
 		irqfd_update(kvm, irqfd, irq_rt);
 
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 265ab72..9f614b4 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -409,8 +409,7 @@ int kvm_set_irq_routing(struct kvm *kvm,
 
 	mutex_lock(&kvm->irq_lock);
 	old = kvm->irq_routing;
-	rcu_assign_pointer(kvm->irq_routing, new);
-	kvm_irqfd_update(kvm, new);
+	kvm_irq_routing_update(kvm, new);
 	mutex_unlock(&kvm->irq_lock);
 
 	synchronize_rcu();

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH RFC] kvm: fast-path msi injection with irqfd
  2010-11-18 10:57   ` Michael S. Tsirkin
  (?)
@ 2010-11-18 11:03   ` Avi Kivity
  2010-11-18 11:10     ` Michael S. Tsirkin
  -1 siblings, 1 reply; 22+ messages in thread
From: Avi Kivity @ 2010-11-18 11:03 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Marcelo Tosatti, Gleb Natapov, Xiao Guangrong, Gregory Haskins,
	Chris Lalancette, kvm, linux-kernel

On 11/18/2010 12:57 PM, Michael S. Tsirkin wrote:
> So the following on top will fix it all.
> Any more comments befpre I bundle it up,
> test and report?
>

Nope (not that I can comment on an incremental).

I guess I should create an empty Documentation/kvm/locking.txt and force 
everyone else to update it.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH RFC] kvm: fast-path msi injection with irqfd
  2010-11-18 11:03   ` Avi Kivity
@ 2010-11-18 11:10     ` Michael S. Tsirkin
  2010-11-18 12:29       ` Avi Kivity
  0 siblings, 1 reply; 22+ messages in thread
From: Michael S. Tsirkin @ 2010-11-18 11:10 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Marcelo Tosatti, Gleb Natapov, Xiao Guangrong, Gregory Haskins,
	Chris Lalancette, kvm, linux-kernel

On Thu, Nov 18, 2010 at 01:03:44PM +0200, Avi Kivity wrote:
> On 11/18/2010 12:57 PM, Michael S. Tsirkin wrote:
> >So the following on top will fix it all.
> >Any more comments befpre I bundle it up,
> >test and report?
> >
> 
> Nope (not that I can comment on an incremental).

Here it is rolled up.

> I guess I should create an empty Documentation/kvm/locking.txt and
> force everyone else to update it.

Comments near the relevant fields not better?

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a055742..d13ced3 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -16,6 +16,7 @@
 #include <linux/mm.h>
 #include <linux/preempt.h>
 #include <linux/msi.h>
+#include <linux/rcupdate.h>
 #include <asm/signal.h>
 
 #include <linux/kvm.h>
@@ -206,6 +207,8 @@ struct kvm {
 
 	struct mutex irq_lock;
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
+	/* Update side is protected by irq_lock and,
+	 * if configured, irqfds.lock. */
 	struct kvm_irq_routing_table __rcu *irq_routing;
 	struct hlist_head mask_notifier_list;
 	struct hlist_head irq_ack_notifier_list;
@@ -462,6 +465,8 @@ void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
 				   unsigned long *deliver_bitmask);
 #endif
 int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level);
+int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm,
+		int irq_source_id, int level);
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
 				   struct kvm_irq_ack_notifier *kian);
@@ -603,6 +608,7 @@ static inline void kvm_free_irq_routing(struct kvm *kvm) {}
 void kvm_eventfd_init(struct kvm *kvm);
 int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags);
 void kvm_irqfd_release(struct kvm *kvm);
+void kvm_irq_routing_update(struct kvm *, struct kvm_irq_routing_table *);
 int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args);
 
 #else
@@ -614,6 +620,12 @@ static inline int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags)
 }
 
 static inline void kvm_irqfd_release(struct kvm *kvm) {}
+static inline void kvm_irq_routing_update(struct kvm *kvm,
+					  struct kvm_irq_routing_table *irq_rt)
+{
+	rcu_assign_pointer(kvm->irq_routing, irq_rt);
+}
+
 static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 {
 	return -ENOSYS;
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index c1f1e3c..b0cfae7 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -44,14 +44,19 @@
  */
 
 struct _irqfd {
-	struct kvm               *kvm;
-	struct eventfd_ctx       *eventfd;
-	int                       gsi;
-	struct list_head          list;
-	poll_table                pt;
-	wait_queue_t              wait;
-	struct work_struct        inject;
-	struct work_struct        shutdown;
+	/* Used for MSI fast-path */
+	struct kvm *kvm;
+	wait_queue_t wait;
+	/* Update side is protected by irqfds.lock */
+	struct kvm_kernel_irq_routing_entry __rcu *irq_entry;
+	/* Used for level IRQ fast-path */
+	int gsi;
+	struct work_struct inject;
+	/* Used for setup/shutdown */
+	struct eventfd_ctx *eventfd;
+	struct list_head list;
+	poll_table pt;
+	struct work_struct shutdown;
 };
 
 static struct workqueue_struct *irqfd_cleanup_wq;
@@ -125,10 +130,18 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
 {
 	struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait);
 	unsigned long flags = (unsigned long)key;
+	struct kvm_kernel_irq_routing_entry *irq;
 
-	if (flags & POLLIN)
+	if (flags & POLLIN) {
+		rcu_read_lock();
+		irq = rcu_dereference(irqfd->irq_entry);
 		/* An event has been signaled, inject an interrupt */
-		schedule_work(&irqfd->inject);
+		if (irq)
+			kvm_set_msi(irq, irqfd->kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1);
+		else
+			schedule_work(&irqfd->inject);
+		rcu_read_unlock();
+	}
 
 	if (flags & POLLHUP) {
 		/* The eventfd is closing, detach from KVM */
@@ -163,9 +176,31 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
 	add_wait_queue(wqh, &irqfd->wait);
 }
 
+/* Must be called under irqfds.lock */
+static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd,
+			 struct kvm_irq_routing_table *irq_rt)
+{
+	struct kvm_kernel_irq_routing_entry *e;
+	struct hlist_node *n;
+
+	if (irqfd->gsi >= irq_rt->nr_rt_entries) {
+		rcu_assign_pointer(irqfd->irq_entry, NULL);
+		return;
+	}
+
+	hlist_for_each_entry(e, n, &irq_rt->map[irqfd->gsi], link) {
+		/* Only fast-path MSI. */
+		if (e->type == KVM_IRQ_ROUTING_MSI)
+			rcu_assign_pointer(irqfd->irq_entry, e);
+		else
+			rcu_assign_pointer(irqfd->irq_entry, NULL);
+	}
+}
+
 static int
 kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
 {
+	struct kvm_irq_routing_table *irq_rt;
 	struct _irqfd *irqfd, *tmp;
 	struct file *file = NULL;
 	struct eventfd_ctx *eventfd = NULL;
@@ -215,6 +250,10 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
 		goto fail;
 	}
 
+	irq_rt = rcu_dereference_protected(kvm->irq_routing,
+					   lockdep_is_held(&kvm->irqfds.lock));
+	irqfd_update(kvm, irqfd, irq_rt);
+
 	events = file->f_op->poll(file, &irqfd->pt);
 
 	list_add_tail(&irqfd->list, &kvm->irqfds.items);
@@ -271,8 +310,15 @@ kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi)
 	spin_lock_irq(&kvm->irqfds.lock);
 
 	list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) {
-		if (irqfd->eventfd == eventfd && irqfd->gsi == gsi)
+		if (irqfd->eventfd == eventfd && irqfd->gsi == gsi) {
+			/* This rcu_assign_pointer is needed for when
+			 * another thread calls kvm_irqfd_update before
+			 * we flush workqueue below.
+			 * It is paired with synchronize_rcu done by caller
+			 * of that function. */
+			rcu_assign_pointer(irqfd->irq_entry, NULL);
 			irqfd_deactivate(irqfd);
+		}
 	}
 
 	spin_unlock_irq(&kvm->irqfds.lock);
@@ -321,6 +367,23 @@ kvm_irqfd_release(struct kvm *kvm)
 
 }
 
+/* Change irq_routing and irqfd.  Caller must invoke synchronize_rcu
+ * afterwards. */
+void kvm_irq_routing_update(struct kvm *kvm,
+			    struct kvm_irq_routing_table *irq_rt)
+{
+	struct _irqfd *irqfd;
+
+	spin_lock_irq(&kvm->irqfds.lock);
+
+	rcu_assign_pointer(kvm->irq_routing, irq_rt);
+
+	list_for_each_entry(irqfd, &kvm->irqfds.items, list)
+		irqfd_update(kvm, irqfd, irq_rt);
+
+	spin_unlock_irq(&kvm->irqfds.lock);
+}
+
 /*
  * create a host-wide workqueue for issuing deferred shutdown requests
  * aggregated from all vm* instances. We need our own isolated single-thread
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 8edca91..9f614b4 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -114,8 +114,8 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
 	return r;
 }
 
-static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
-		       struct kvm *kvm, int irq_source_id, int level)
+int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
+		struct kvm *kvm, int irq_source_id, int level)
 {
 	struct kvm_lapic_irq irq;
 
@@ -409,8 +409,9 @@ int kvm_set_irq_routing(struct kvm *kvm,
 
 	mutex_lock(&kvm->irq_lock);
 	old = kvm->irq_routing;
-	rcu_assign_pointer(kvm->irq_routing, new);
+	kvm_irq_routing_update(kvm, new);
 	mutex_unlock(&kvm->irq_lock);
+
 	synchronize_rcu();
 
 	new = old;

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH RFC] kvm: fast-path msi injection with irqfd
  2010-11-18 11:10     ` Michael S. Tsirkin
@ 2010-11-18 12:29       ` Avi Kivity
  2010-11-18 13:03         ` Michael S. Tsirkin
  0 siblings, 1 reply; 22+ messages in thread
From: Avi Kivity @ 2010-11-18 12:29 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Marcelo Tosatti, Gleb Natapov, Xiao Guangrong, Gregory Haskins,
	Chris Lalancette, kvm, linux-kernel

On 11/18/2010 01:10 PM, Michael S. Tsirkin wrote:
> >  I guess I should create an empty Documentation/kvm/locking.txt and
> >  force everyone else to update it.
>
> Comments near the relevant fields not better?
>

Not an either/or.  You can't understand the system from random source 
comments.

> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index a055742..d13ced3 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -16,6 +16,7 @@
>   #include<linux/mm.h>
>   #include<linux/preempt.h>
>   #include<linux/msi.h>
> +#include<linux/rcupdate.h>
>   #include<asm/signal.h>
>
>   #include<linux/kvm.h>
> @@ -206,6 +207,8 @@ struct kvm {
>
>   	struct mutex irq_lock;
>   #ifdef CONFIG_HAVE_KVM_IRQCHIP
> +	/* Update side is protected by irq_lock and,
> +	 * if configured, irqfds.lock. */

/*
  * kernel style comment
  * here and elsewhere
  */



>   	struct kvm_irq_routing_table __rcu *irq_routing;
>   	struct hlist_head mask_notifier_list;
>   	struct hlist_head irq_ack_notifier_list;
> @@ -462,6 +465,8 @@ void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
>   				   unsigned long *deliver_bitmask);
>   #endif
>   int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level);
> +int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm,
> +		int irq_source_id, int level);

No point in the level argument for an msi specific function.

>
>   #else
> @@ -614,6 +620,12 @@ static inline int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags)
>   }
>
>   static inline void kvm_irqfd_release(struct kvm *kvm) {}

blank line

> +static inline void kvm_irq_routing_update(struct kvm *kvm,
> +					  struct kvm_irq_routing_table *irq_rt)
> +{
> +	rcu_assign_pointer(kvm->irq_routing, irq_rt);
> +}
> +
>   static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
>   {
>   	return -ENOSYS;

Apart from these minor issues, looks good.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH RFC] kvm: fast-path msi injection with irqfd
  2010-11-18 12:29       ` Avi Kivity
@ 2010-11-18 13:03         ` Michael S. Tsirkin
  2010-11-18 13:09           ` Avi Kivity
  2010-11-18 13:14           ` Gleb Natapov
  0 siblings, 2 replies; 22+ messages in thread
From: Michael S. Tsirkin @ 2010-11-18 13:03 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Marcelo Tosatti, Gleb Natapov, Xiao Guangrong, Gregory Haskins,
	Chris Lalancette, kvm, linux-kernel

On Thu, Nov 18, 2010 at 02:29:11PM +0200, Avi Kivity wrote:
> On 11/18/2010 01:10 PM, Michael S. Tsirkin wrote:
> >>  I guess I should create an empty Documentation/kvm/locking.txt and
> >>  force everyone else to update it.
> >
> >Comments near the relevant fields not better?
> >
> 
> Not an either/or.  You can't understand the system from random
> source comments.
> 
> >diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> >index a055742..d13ced3 100644
> >--- a/include/linux/kvm_host.h
> >+++ b/include/linux/kvm_host.h
> >@@ -16,6 +16,7 @@
> >  #include<linux/mm.h>
> >  #include<linux/preempt.h>
> >  #include<linux/msi.h>
> >+#include<linux/rcupdate.h>
> >  #include<asm/signal.h>
> >
> >  #include<linux/kvm.h>
> >@@ -206,6 +207,8 @@ struct kvm {
> >
> >  	struct mutex irq_lock;
> >  #ifdef CONFIG_HAVE_KVM_IRQCHIP
> >+	/* Update side is protected by irq_lock and,
> >+	 * if configured, irqfds.lock. */
> 
> /*
>  * kernel style comment
>  * here and elsewhere
>  */
> 
> 
> 
> >  	struct kvm_irq_routing_table __rcu *irq_routing;
> >  	struct hlist_head mask_notifier_list;
> >  	struct hlist_head irq_ack_notifier_list;
> >@@ -462,6 +465,8 @@ void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
> >  				   unsigned long *deliver_bitmask);
> >  #endif
> >  int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level);
> >+int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm,
> >+		int irq_source_id, int level);
> 
> No point in the level argument for an msi specific function.

This is an existing function I made non-static.
We have per-gsi callbacks so level is required there to match.
I could add a wrapper I guess:

int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm,
		int irq_source_id, int level)
{
	if (!level)
		return -1;
	return kvm_send_msi(irq_entry, kvm, irq_source_id);
}

This results in less code for irqfd but more code for ioctl injection
... is it worth it?

> >
> >  #else
> >@@ -614,6 +620,12 @@ static inline int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags)
> >  }
> >
> >  static inline void kvm_irqfd_release(struct kvm *kvm) {}
> 
> blank line
> 

There's no line before kvm_eventfd_init either ...
I added one.

> >+static inline void kvm_irq_routing_update(struct kvm *kvm,
> >+					  struct kvm_irq_routing_table *irq_rt)
> >+{
> >+	rcu_assign_pointer(kvm->irq_routing, irq_rt);
> >+}
> >+
> >  static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
> >  {
> >  	return -ENOSYS;
> 
> Apart from these minor issues, looks good.


Something we should consider improving is the loop over all VCPUs that
kvm_irq_delivery_to_apic invokes.  I think that (for non-broadcast
interrupts) it should be possible to precompute an store the CPU
in question as part of the routing entry.

Something for a separate patch ... comments?

> -- 
> error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH RFC] kvm: fast-path msi injection with irqfd
  2010-11-18 13:03         ` Michael S. Tsirkin
@ 2010-11-18 13:09           ` Avi Kivity
  2010-11-18 13:14           ` Gleb Natapov
  1 sibling, 0 replies; 22+ messages in thread
From: Avi Kivity @ 2010-11-18 13:09 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Marcelo Tosatti, Gleb Natapov, Xiao Guangrong, Gregory Haskins,
	Chris Lalancette, kvm, linux-kernel

On 11/18/2010 03:03 PM, Michael S. Tsirkin wrote:
> >  >   int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level);
> >  >+int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm,
> >  >+		int irq_source_id, int level);
> >
> >  No point in the level argument for an msi specific function.
>
> This is an existing function I made non-static.
> We have per-gsi callbacks so level is required there to match.

Right.

> I could add a wrapper I guess:
>
> int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm,
> 		int irq_source_id, int level)
> {
> 	if (!level)
> 		return -1;
> 	return kvm_send_msi(irq_entry, kvm, irq_source_id);
> }
>
> This results in less code for irqfd but more code for ioctl injection
> ... is it worth it?

IMO not.

> >
> >  Apart from these minor issues, looks good.
>
>
> Something we should consider improving is the loop over all VCPUs that
> kvm_irq_delivery_to_apic invokes.  I think that (for non-broadcast
> interrupts) it should be possible to precompute an store the CPU
> in question as part of the routing entry.

> Something for a separate patch ... comments?

Yes.  Either precompute, or compute on first use and cache.  Precompute 
is more realtime-friendly so I prefer it.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH RFC] kvm: fast-path msi injection with irqfd
  2010-11-18 13:03         ` Michael S. Tsirkin
  2010-11-18 13:09           ` Avi Kivity
@ 2010-11-18 13:14           ` Gleb Natapov
  2010-11-18 13:18             ` Avi Kivity
  2010-11-18 13:20             ` Michael S. Tsirkin
  1 sibling, 2 replies; 22+ messages in thread
From: Gleb Natapov @ 2010-11-18 13:14 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Avi Kivity, Marcelo Tosatti, Xiao Guangrong, Gregory Haskins,
	Chris Lalancette, kvm, linux-kernel

On Thu, Nov 18, 2010 at 03:03:37PM +0200, Michael S. Tsirkin wrote:
> > >+static inline void kvm_irq_routing_update(struct kvm *kvm,
> > >+					  struct kvm_irq_routing_table *irq_rt)
> > >+{
> > >+	rcu_assign_pointer(kvm->irq_routing, irq_rt);
> > >+}
> > >+
> > >  static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
> > >  {
> > >  	return -ENOSYS;
> > 
> > Apart from these minor issues, looks good.
> 
> 
> Something we should consider improving is the loop over all VCPUs that
> kvm_irq_delivery_to_apic invokes.  I think that (for non-broadcast
> interrupts) it should be possible to precompute an store the CPU
> in question as part of the routing entry.
> 
> Something for a separate patch ... comments?
> 
I do not think this info should be part of routing entry. Routing entry
is more about describing wires on the board. Other then that
this is a good idea that, IIRC, we already discussed once.

--
			Gleb.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH RFC] kvm: fast-path msi injection with irqfd
  2010-11-18 13:14           ` Gleb Natapov
@ 2010-11-18 13:18             ` Avi Kivity
  2010-11-18 13:20             ` Michael S. Tsirkin
  1 sibling, 0 replies; 22+ messages in thread
From: Avi Kivity @ 2010-11-18 13:18 UTC (permalink / raw)
  To: Gleb Natapov
  Cc: Michael S. Tsirkin, Marcelo Tosatti, Xiao Guangrong,
	Gregory Haskins, Chris Lalancette, kvm, linux-kernel

On 11/18/2010 03:14 PM, Gleb Natapov wrote:
> On Thu, Nov 18, 2010 at 03:03:37PM +0200, Michael S. Tsirkin wrote:
> >  >  >+static inline void kvm_irq_routing_update(struct kvm *kvm,
> >  >  >+					  struct kvm_irq_routing_table *irq_rt)
> >  >  >+{
> >  >  >+	rcu_assign_pointer(kvm->irq_routing, irq_rt);
> >  >  >+}
> >  >  >+
> >  >  >   static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
> >  >  >   {
> >  >  >   	return -ENOSYS;
> >  >
> >  >  Apart from these minor issues, looks good.
> >
> >
> >  Something we should consider improving is the loop over all VCPUs that
> >  kvm_irq_delivery_to_apic invokes.  I think that (for non-broadcast
> >  interrupts) it should be possible to precompute an store the CPU
> >  in question as part of the routing entry.
> >
> >  Something for a separate patch ... comments?
> >
> I do not think this info should be part of routing entry. Routing entry
> is more about describing wires on the board. Other then that
> this is a good idea that, IIRC, we already discussed once.
>

Not as part of the routing entry exposed to userspace.  But as a private 
kernel field, why not?

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH RFC] kvm: fast-path msi injection with irqfd
  2010-11-18 13:14           ` Gleb Natapov
  2010-11-18 13:18             ` Avi Kivity
@ 2010-11-18 13:20             ` Michael S. Tsirkin
  2010-11-18 13:35               ` Gleb Natapov
  1 sibling, 1 reply; 22+ messages in thread
From: Michael S. Tsirkin @ 2010-11-18 13:20 UTC (permalink / raw)
  To: Gleb Natapov
  Cc: Avi Kivity, Marcelo Tosatti, Xiao Guangrong, Gregory Haskins,
	Chris Lalancette, kvm, linux-kernel

On Thu, Nov 18, 2010 at 03:14:53PM +0200, Gleb Natapov wrote:
> On Thu, Nov 18, 2010 at 03:03:37PM +0200, Michael S. Tsirkin wrote:
> > > >+static inline void kvm_irq_routing_update(struct kvm *kvm,
> > > >+					  struct kvm_irq_routing_table *irq_rt)
> > > >+{
> > > >+	rcu_assign_pointer(kvm->irq_routing, irq_rt);
> > > >+}
> > > >+
> > > >  static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
> > > >  {
> > > >  	return -ENOSYS;
> > > 
> > > Apart from these minor issues, looks good.
> > 
> > 
> > Something we should consider improving is the loop over all VCPUs that
> > kvm_irq_delivery_to_apic invokes.  I think that (for non-broadcast
> > interrupts) it should be possible to precompute an store the CPU
> > in question as part of the routing entry.
> > 
> > Something for a separate patch ... comments?
> > 
> I do not think this info should be part of routing entry. Routing entry
> is more about describing wires on the board.

Not for msi. kvm_kernel_irq_routing_entry seems to just keep an
address/data pair in that case. So

	union {
		struct {
			unsigned irqchip;
			unsigned pin;
		} irqchip;
		struct msi_msg msi;
	};

would become

	union {
		struct {
			unsigned irqchip;
			unsigned pin;
		} irqchip;
		struct {
			struct msi_msg msi;
			struct kvm_vpcu *dest;
		} msi;
	};

or something like this.

> Other then that
> this is a good idea that, IIRC, we already discussed once.
> 
> --
> 			Gleb.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH RFC] kvm: fast-path msi injection with irqfd
  2010-11-18 13:20             ` Michael S. Tsirkin
@ 2010-11-18 13:35               ` Gleb Natapov
  2010-11-18 13:39                 ` Avi Kivity
  2010-11-18 13:48                 ` Michael S. Tsirkin
  0 siblings, 2 replies; 22+ messages in thread
From: Gleb Natapov @ 2010-11-18 13:35 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Avi Kivity, Marcelo Tosatti, Xiao Guangrong, Gregory Haskins,
	Chris Lalancette, kvm, linux-kernel

On Thu, Nov 18, 2010 at 03:20:27PM +0200, Michael S. Tsirkin wrote:
> On Thu, Nov 18, 2010 at 03:14:53PM +0200, Gleb Natapov wrote:
> > On Thu, Nov 18, 2010 at 03:03:37PM +0200, Michael S. Tsirkin wrote:
> > > > >+static inline void kvm_irq_routing_update(struct kvm *kvm,
> > > > >+					  struct kvm_irq_routing_table *irq_rt)
> > > > >+{
> > > > >+	rcu_assign_pointer(kvm->irq_routing, irq_rt);
> > > > >+}
> > > > >+
> > > > >  static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
> > > > >  {
> > > > >  	return -ENOSYS;
> > > > 
> > > > Apart from these minor issues, looks good.
> > > 
> > > 
> > > Something we should consider improving is the loop over all VCPUs that
> > > kvm_irq_delivery_to_apic invokes.  I think that (for non-broadcast
> > > interrupts) it should be possible to precompute an store the CPU
> > > in question as part of the routing entry.
> > > 
> > > Something for a separate patch ... comments?
> > > 
> > I do not think this info should be part of routing entry. Routing entry
> > is more about describing wires on the board.
> 
> Not for msi. kvm_kernel_irq_routing_entry seems to just keep an
> address/data pair in that case. So
> 
Yeah. Using routing_entry for MSI was miss design. We discussed that too :)

> 	union {
> 		struct {
> 			unsigned irqchip;
> 			unsigned pin;
> 		} irqchip;
> 		struct msi_msg msi;
> 	};
> 
> would become
> 
> 	union {
> 		struct {
> 			unsigned irqchip;
> 			unsigned pin;
> 		} irqchip;
> 		struct {
> 			struct msi_msg msi;
> 			struct kvm_vpcu *dest;
> 		} msi;
> 	};
> 
> or something like this.
Ah so you want to do it only for MSI? For MSI it makes sense. Remember
though that sometimes destination depend on message itself (specifically
on delivery mode).

> 
> > Other then that
> > this is a good idea that, IIRC, we already discussed once.
> > 
> > --
> > 			Gleb.

--
			Gleb.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH RFC] kvm: fast-path msi injection with irqfd
  2010-11-18 13:35               ` Gleb Natapov
@ 2010-11-18 13:39                 ` Avi Kivity
  2010-11-18 13:49                   ` Michael S. Tsirkin
  2010-11-18 13:48                 ` Michael S. Tsirkin
  1 sibling, 1 reply; 22+ messages in thread
From: Avi Kivity @ 2010-11-18 13:39 UTC (permalink / raw)
  To: Gleb Natapov
  Cc: Michael S. Tsirkin, Marcelo Tosatti, Xiao Guangrong,
	Gregory Haskins, Chris Lalancette, kvm, linux-kernel

On 11/18/2010 03:35 PM, Gleb Natapov wrote:
> >
> >  or something like this.
> Ah so you want to do it only for MSI? For MSI it makes sense. Remember
> though that sometimes destination depend on message itself (specifically
> on delivery mode).

Yes, broadcast or multicast or lowest priority wouldn't get this treatment.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH RFC] kvm: fast-path msi injection with irqfd
  2010-11-18 13:35               ` Gleb Natapov
  2010-11-18 13:39                 ` Avi Kivity
@ 2010-11-18 13:48                 ` Michael S. Tsirkin
  2010-11-18 14:39                   ` Gleb Natapov
  1 sibling, 1 reply; 22+ messages in thread
From: Michael S. Tsirkin @ 2010-11-18 13:48 UTC (permalink / raw)
  To: Gleb Natapov
  Cc: Avi Kivity, Marcelo Tosatti, Xiao Guangrong, Gregory Haskins,
	Chris Lalancette, kvm, linux-kernel

On Thu, Nov 18, 2010 at 03:35:01PM +0200, Gleb Natapov wrote:
> On Thu, Nov 18, 2010 at 03:20:27PM +0200, Michael S. Tsirkin wrote:
> > On Thu, Nov 18, 2010 at 03:14:53PM +0200, Gleb Natapov wrote:
> > > On Thu, Nov 18, 2010 at 03:03:37PM +0200, Michael S. Tsirkin wrote:
> > > > > >+static inline void kvm_irq_routing_update(struct kvm *kvm,
> > > > > >+					  struct kvm_irq_routing_table *irq_rt)
> > > > > >+{
> > > > > >+	rcu_assign_pointer(kvm->irq_routing, irq_rt);
> > > > > >+}
> > > > > >+
> > > > > >  static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
> > > > > >  {
> > > > > >  	return -ENOSYS;
> > > > > 
> > > > > Apart from these minor issues, looks good.
> > > > 
> > > > 
> > > > Something we should consider improving is the loop over all VCPUs that
> > > > kvm_irq_delivery_to_apic invokes.  I think that (for non-broadcast
> > > > interrupts) it should be possible to precompute an store the CPU
> > > > in question as part of the routing entry.
> > > > 
> > > > Something for a separate patch ... comments?
> > > > 
> > > I do not think this info should be part of routing entry. Routing entry
> > > is more about describing wires on the board.
> > 
> > Not for msi. kvm_kernel_irq_routing_entry seems to just keep an
> > address/data pair in that case. So
> > 
> Yeah. Using routing_entry for MSI was miss design. We discussed that too :)
> 
> > 	union {
> > 		struct {
> > 			unsigned irqchip;
> > 			unsigned pin;
> > 		} irqchip;
> > 		struct msi_msg msi;
> > 	};
> > 
> > would become
> > 
> > 	union {
> > 		struct {
> > 			unsigned irqchip;
> > 			unsigned pin;
> > 		} irqchip;
> > 		struct {
> > 			struct msi_msg msi;
> > 			struct kvm_vpcu *dest;
> > 		} msi;
> > 	};
> > 
> > or something like this.
> Ah so you want to do it only for MSI? For MSI it makes sense. Remember
> though that sometimes destination depend on message itself (specifically
> on delivery mode).

Of course. We'll take message/data and precompute destination.
Set to NULL for e.g. broadcast and recompute at injection time
in that case.  BTW SELF doesn't work for MSI at the moment, not sure
whether it's relevant or when is it used.

> > 
> > > Other then that
> > > this is a good idea that, IIRC, we already discussed once.
> > > 
> > > --
> > > 			Gleb.
> 
> --
> 			Gleb.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH RFC] kvm: fast-path msi injection with irqfd
  2010-11-18 13:39                 ` Avi Kivity
@ 2010-11-18 13:49                   ` Michael S. Tsirkin
  0 siblings, 0 replies; 22+ messages in thread
From: Michael S. Tsirkin @ 2010-11-18 13:49 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Gleb Natapov, Marcelo Tosatti, Xiao Guangrong, Gregory Haskins,
	Chris Lalancette, kvm, linux-kernel

On Thu, Nov 18, 2010 at 03:39:15PM +0200, Avi Kivity wrote:
> On 11/18/2010 03:35 PM, Gleb Natapov wrote:
> >>
> >>  or something like this.
> >Ah so you want to do it only for MSI? For MSI it makes sense. Remember
> >though that sometimes destination depend on message itself (specifically
> >on delivery mode).
> 
> Yes, broadcast or multicast or lowest priority wouldn't get this treatment.

Unless there's a single online VCPU :)

> -- 
> error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH RFC] kvm: fast-path msi injection with irqfd
  2010-11-18 13:48                 ` Michael S. Tsirkin
@ 2010-11-18 14:39                   ` Gleb Natapov
  0 siblings, 0 replies; 22+ messages in thread
From: Gleb Natapov @ 2010-11-18 14:39 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Avi Kivity, Marcelo Tosatti, Xiao Guangrong, Gregory Haskins,
	Chris Lalancette, kvm, linux-kernel

On Thu, Nov 18, 2010 at 03:48:43PM +0200, Michael S. Tsirkin wrote:
> On Thu, Nov 18, 2010 at 03:35:01PM +0200, Gleb Natapov wrote:
> > On Thu, Nov 18, 2010 at 03:20:27PM +0200, Michael S. Tsirkin wrote:
> > > On Thu, Nov 18, 2010 at 03:14:53PM +0200, Gleb Natapov wrote:
> > > > On Thu, Nov 18, 2010 at 03:03:37PM +0200, Michael S. Tsirkin wrote:
> > > > > > >+static inline void kvm_irq_routing_update(struct kvm *kvm,
> > > > > > >+					  struct kvm_irq_routing_table *irq_rt)
> > > > > > >+{
> > > > > > >+	rcu_assign_pointer(kvm->irq_routing, irq_rt);
> > > > > > >+}
> > > > > > >+
> > > > > > >  static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
> > > > > > >  {
> > > > > > >  	return -ENOSYS;
> > > > > > 
> > > > > > Apart from these minor issues, looks good.
> > > > > 
> > > > > 
> > > > > Something we should consider improving is the loop over all VCPUs that
> > > > > kvm_irq_delivery_to_apic invokes.  I think that (for non-broadcast
> > > > > interrupts) it should be possible to precompute an store the CPU
> > > > > in question as part of the routing entry.
> > > > > 
> > > > > Something for a separate patch ... comments?
> > > > > 
> > > > I do not think this info should be part of routing entry. Routing entry
> > > > is more about describing wires on the board.
> > > 
> > > Not for msi. kvm_kernel_irq_routing_entry seems to just keep an
> > > address/data pair in that case. So
> > > 
> > Yeah. Using routing_entry for MSI was miss design. We discussed that too :)
> > 
> > > 	union {
> > > 		struct {
> > > 			unsigned irqchip;
> > > 			unsigned pin;
> > > 		} irqchip;
> > > 		struct msi_msg msi;
> > > 	};
> > > 
> > > would become
> > > 
> > > 	union {
> > > 		struct {
> > > 			unsigned irqchip;
> > > 			unsigned pin;
> > > 		} irqchip;
> > > 		struct {
> > > 			struct msi_msg msi;
> > > 			struct kvm_vpcu *dest;
> > > 		} msi;
> > > 	};
> > > 
> > > or something like this.
> > Ah so you want to do it only for MSI? For MSI it makes sense. Remember
> > though that sometimes destination depend on message itself (specifically
> > on delivery mode).
> 
> Of course. We'll take message/data and precompute destination.
> Set to NULL for e.g. broadcast and recompute at injection time
> in that case.  BTW SELF doesn't work for MSI at the moment, not sure
> whether it's relevant or when is it used.
> 
Yes, only lowest prio is defined for MSI. Self or all but self has
not meaning for MSI.

--
			Gleb.

^ permalink raw reply	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2010-11-18 14:39 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-11-17 22:12 [PATCH RFC] kvm: fast-path msi injection with irqfd Michael S. Tsirkin
2010-11-18  9:05 ` Gleb Natapov
2010-11-18  9:16   ` Michael S. Tsirkin
2010-11-18  9:20     ` Gleb Natapov
2010-11-18  9:34   ` Michael S. Tsirkin
2010-11-18 10:04     ` Gleb Natapov
2010-11-18  9:55 ` Avi Kivity
2010-11-18 10:57 ` Michael S. Tsirkin
2010-11-18 10:57   ` Michael S. Tsirkin
2010-11-18 11:03   ` Avi Kivity
2010-11-18 11:10     ` Michael S. Tsirkin
2010-11-18 12:29       ` Avi Kivity
2010-11-18 13:03         ` Michael S. Tsirkin
2010-11-18 13:09           ` Avi Kivity
2010-11-18 13:14           ` Gleb Natapov
2010-11-18 13:18             ` Avi Kivity
2010-11-18 13:20             ` Michael S. Tsirkin
2010-11-18 13:35               ` Gleb Natapov
2010-11-18 13:39                 ` Avi Kivity
2010-11-18 13:49                   ` Michael S. Tsirkin
2010-11-18 13:48                 ` Michael S. Tsirkin
2010-11-18 14:39                   ` Gleb Natapov

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.