All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] kvm: Use a bitmap for tracking used GSIs
@ 2009-05-07 22:22 Alex Williamson
  2009-05-08 22:31 ` [PATCH v2] " Alex Williamson
  2009-05-11 12:00 ` [PATCH] " Yang, Sheng
  0 siblings, 2 replies; 56+ messages in thread
From: Alex Williamson @ 2009-05-07 22:22 UTC (permalink / raw)
  To: kvm, sheng; +Cc: alex.williamson

We're currently using a counter to track the most recent GSI we've
handed out.  This quickly hits KVM_MAX_IRQ_ROUTES when using device
assignment with a driver that regularly toggles the MSI enable bit.
This can mean only a few minutes of usable run time.  Instead, track
used GSIs in a bitmap.

Signed-off-by: Alex Williamson <alex.williamson@hp.com>
---

 Applies on top of "kvm: device-assignment: Catch GSI overflow"

 hw/device-assignment.c  |    4 ++-
 kvm/libkvm/kvm-common.h |    3 +-
 kvm/libkvm/libkvm.c     |   68 +++++++++++++++++++++++++++++++++++++++++------
 kvm/libkvm/libkvm.h     |   10 +++++++
 4 files changed, 74 insertions(+), 11 deletions(-)

diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index e06dd08..5bdae24 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -561,8 +561,10 @@ static void free_dev_irq_entries(AssignedDevice *dev)
 {
     int i;
 
-    for (i = 0; i < dev->irq_entries_nr; i++)
+    for (i = 0; i < dev->irq_entries_nr; i++) {
         kvm_del_routing_entry(kvm_context, &dev->entry[i]);
+        kvm_free_irq_route_gsi(kvm_context, dev->entry[i].gsi);
+    }
     free(dev->entry);
     dev->entry = NULL;
     dev->irq_entries_nr = 0;
diff --git a/kvm/libkvm/kvm-common.h b/kvm/libkvm/kvm-common.h
index 591fb53..94f86e5 100644
--- a/kvm/libkvm/kvm-common.h
+++ b/kvm/libkvm/kvm-common.h
@@ -66,8 +66,9 @@ struct kvm_context {
 #ifdef KVM_CAP_IRQ_ROUTING
 	struct kvm_irq_routing *irq_routes;
 	int nr_allocated_irq_routes;
+	void *used_gsi_bitmap;
+	int max_gsi;
 #endif
-	int max_used_gsi;
 };
 
 int kvm_alloc_kernel_memory(kvm_context_t kvm, unsigned long memory,
diff --git a/kvm/libkvm/libkvm.c b/kvm/libkvm/libkvm.c
index 2a4165a..43abc7d 100644
--- a/kvm/libkvm/libkvm.c
+++ b/kvm/libkvm/libkvm.c
@@ -1298,8 +1298,6 @@ int kvm_add_routing_entry(kvm_context_t kvm,
 	new->flags = entry->flags;
 	new->u = entry->u;
 
-	if (entry->gsi > kvm->max_used_gsi)
-		kvm->max_used_gsi = entry->gsi;
 	return 0;
 #else
 	return -ENOSYS;
@@ -1404,20 +1402,72 @@ int kvm_commit_irq_routes(kvm_context_t kvm)
 #endif
 }
 
+#ifdef KVM_CAP_IRQ_ROUTING
+static inline void set_bit(unsigned int *buf, int bit)
+{
+	buf[bit >> 5] |= (1U << (bit & 0x1f));
+}
+
+static inline void clear_bit(unsigned int *buf, int bit)
+{
+	buf[bit >> 5] &= ~(1U << (bit & 0x1f));
+}
+
+static int kvm_find_free_gsi(kvm_context_t kvm)
+{
+	int i, bit, gsi;
+	unsigned int *buf = kvm->used_gsi_bitmap;
+
+	for (i = 0; i < (kvm->max_gsi >> 5); i++) {
+		if (buf[i] != ~0U)
+			break;
+	}
+
+	if (i == kvm->max_gsi >> 5)
+		return -ENOSPC;
+
+	bit = ffs(~buf[i]);
+	if (!bit)
+		return -EAGAIN;
+
+	gsi = (bit - 1) | (i << 5);
+	set_bit(buf, gsi);
+	return gsi;
+}
+#endif
+
 int kvm_get_irq_route_gsi(kvm_context_t kvm)
 {
 #ifdef KVM_CAP_IRQ_ROUTING
-	if (kvm->max_used_gsi >= KVM_IOAPIC_NUM_PINS)  {
-	    if (kvm->max_used_gsi + 1 < kvm_get_gsi_count(kvm))
-                return kvm->max_used_gsi + 1;
-            else
-                return -ENOSPC;
-        } else
-            return KVM_IOAPIC_NUM_PINS;
+	if (!kvm->max_gsi) {
+		int i;
+
+		/* Round the number of GSIs supported to a 4 byte
+		 * value so we can search it using ints and ffs */
+		i = kvm_get_gsi_count(kvm) & ~0x1f;
+		kvm->used_gsi_bitmap = malloc(i >> 3);
+		if (!kvm->used_gsi_bitmap)
+			return -ENOMEM;
+		memset(kvm->used_gsi_bitmap, 0, i >> 3);
+		kvm->max_gsi = i;
+
+		/* Mark all the IOAPIC pin GSIs as already used */
+		for (i = 0; i <= KVM_IOAPIC_NUM_PINS; i++)
+			set_bit(kvm->used_gsi_bitmap, i);
+	}
+
+	return kvm_find_free_gsi(kvm);
 #else
 	return -ENOSYS;
 #endif
 }
+ 
+void kvm_free_irq_route_gsi(kvm_context_t kvm, int gsi)
+{
+#ifdef KVM_CAP_IRQ_ROUTING
+	clear_bit(kvm->used_gsi_bitmap, gsi);
+#endif
+}
 
 #ifdef KVM_CAP_DEVICE_MSIX
 int kvm_assign_set_msix_nr(kvm_context_t kvm,
diff --git a/kvm/libkvm/libkvm.h b/kvm/libkvm/libkvm.h
index c23d37b..4e9344c 100644
--- a/kvm/libkvm/libkvm.h
+++ b/kvm/libkvm/libkvm.h
@@ -856,6 +856,16 @@ int kvm_commit_irq_routes(kvm_context_t kvm);
  */
 int kvm_get_irq_route_gsi(kvm_context_t kvm);
 
+/*!
+ * \brief Free used GSI number
+ *
+ * Free used GSI number acquired from kvm_get_irq_route_gsi()
+ *
+ * \param kvm Pointer to the current kvm_context
+ * \param gsi GSI number to free
+ */
+void kvm_free_irq_route_gsi(kvm_context_t kvm, int gsi);
+
 #ifdef KVM_CAP_DEVICE_MSIX
 int kvm_assign_set_msix_nr(kvm_context_t kvm,
 			   struct kvm_assigned_msix_nr *msix_nr);


^ permalink raw reply related	[flat|nested] 56+ messages in thread

* [PATCH v2] kvm: Use a bitmap for tracking used GSIs
  2009-05-07 22:22 [PATCH] kvm: Use a bitmap for tracking used GSIs Alex Williamson
@ 2009-05-08 22:31 ` Alex Williamson
  2009-05-12 19:39   ` Michael S. Tsirkin
  2009-05-12 22:07   ` [PATCH v3] " Alex Williamson
  2009-05-11 12:00 ` [PATCH] " Yang, Sheng
  1 sibling, 2 replies; 56+ messages in thread
From: Alex Williamson @ 2009-05-08 22:31 UTC (permalink / raw)
  To: kvm, sheng; +Cc: alex.williamson

We're currently using a counter to track the most recent GSI we've
handed out.  This quickly hits KVM_MAX_IRQ_ROUTES when using device
assignment with a driver that regularly toggles the MSI enable bit.
This can mean only a few minutes of usable run time.  Instead, track
used GSIs in a bitmap.

Signed-off-by: Alex Williamson <alex.williamson@hp.com>
---

 v2: Add a mutex around the bitmap to protect from races

 hw/device-assignment.c  |    4 ++
 kvm/libkvm/kvm-common.h |    4 ++
 kvm/libkvm/libkvm.c     |   82 ++++++++++++++++++++++++++++++++++++++++++-----
 kvm/libkvm/libkvm.h     |   10 ++++++
 4 files changed, 89 insertions(+), 11 deletions(-)

diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index e06dd08..5bdae24 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -561,8 +561,10 @@ static void free_dev_irq_entries(AssignedDevice *dev)
 {
     int i;
 
-    for (i = 0; i < dev->irq_entries_nr; i++)
+    for (i = 0; i < dev->irq_entries_nr; i++) {
         kvm_del_routing_entry(kvm_context, &dev->entry[i]);
+        kvm_free_irq_route_gsi(kvm_context, dev->entry[i].gsi);
+    }
     free(dev->entry);
     dev->entry = NULL;
     dev->irq_entries_nr = 0;
diff --git a/kvm/libkvm/kvm-common.h b/kvm/libkvm/kvm-common.h
index 591fb53..4b3cb51 100644
--- a/kvm/libkvm/kvm-common.h
+++ b/kvm/libkvm/kvm-common.h
@@ -66,8 +66,10 @@ struct kvm_context {
 #ifdef KVM_CAP_IRQ_ROUTING
 	struct kvm_irq_routing *irq_routes;
 	int nr_allocated_irq_routes;
+	void *used_gsi_bitmap;
+	int max_gsi;
+	pthread_mutex_t gsi_mutex;
 #endif
-	int max_used_gsi;
 };
 
 int kvm_alloc_kernel_memory(kvm_context_t kvm, unsigned long memory,
diff --git a/kvm/libkvm/libkvm.c b/kvm/libkvm/libkvm.c
index 2a4165a..13a2c21 100644
--- a/kvm/libkvm/libkvm.c
+++ b/kvm/libkvm/libkvm.c
@@ -35,6 +35,7 @@
 #include <errno.h>
 #include <sys/ioctl.h>
 #include <inttypes.h>
+#include <pthread.h>
 #include "libkvm.h"
 
 #if defined(__x86_64__) || defined(__i386__)
@@ -322,6 +323,9 @@ kvm_context_t kvm_init(struct kvm_callbacks *callbacks,
 	kvm->dirty_pages_log_all = 0;
 	kvm->no_irqchip_creation = 0;
 	kvm->no_pit_creation = 0;
+#ifdef KVM_CAP_IRQ_ROUTING
+	pthread_mutex_init(&kvm->gsi_mutex, NULL);
+#endif
 
 	return kvm;
  out_close:
@@ -1298,8 +1302,6 @@ int kvm_add_routing_entry(kvm_context_t kvm,
 	new->flags = entry->flags;
 	new->u = entry->u;
 
-	if (entry->gsi > kvm->max_used_gsi)
-		kvm->max_used_gsi = entry->gsi;
 	return 0;
 #else
 	return -ENOSYS;
@@ -1404,20 +1406,82 @@ int kvm_commit_irq_routes(kvm_context_t kvm)
 #endif
 }
 
+#ifdef KVM_CAP_IRQ_ROUTING
+static inline void set_bit(unsigned int *buf, int bit)
+{
+	buf[bit >> 5] |= (1U << (bit & 0x1f));
+}
+
+static inline void clear_bit(unsigned int *buf, int bit)
+{
+	buf[bit >> 5] &= ~(1U << (bit & 0x1f));
+}
+
+static int kvm_find_free_gsi(kvm_context_t kvm)
+{
+	int i, bit, gsi;
+	unsigned int *buf = kvm->used_gsi_bitmap;
+
+	for (i = 0; i < (kvm->max_gsi >> 5); i++) {
+		if (buf[i] != ~0U)
+			break;
+	}
+
+	if (i == kvm->max_gsi >> 5)
+		return -ENOSPC;
+
+	bit = ffs(~buf[i]);
+	if (!bit)
+		return -EAGAIN;
+
+	gsi = (bit - 1) | (i << 5);
+	set_bit(buf, gsi);
+	return gsi;
+}
+#endif
+
 int kvm_get_irq_route_gsi(kvm_context_t kvm)
 {
 #ifdef KVM_CAP_IRQ_ROUTING
-	if (kvm->max_used_gsi >= KVM_IOAPIC_NUM_PINS)  {
-	    if (kvm->max_used_gsi + 1 < kvm_get_gsi_count(kvm))
-                return kvm->max_used_gsi + 1;
-            else
-                return -ENOSPC;
-        } else
-            return KVM_IOAPIC_NUM_PINS;
+	int gsi;
+
+	pthread_mutex_lock(&kvm->gsi_mutex);
+
+	if (!kvm->max_gsi) {
+		int i;
+
+		/* Round the number of GSIs supported to a 4 byte
+		 * value so we can search it using ints and ffs */
+		i = kvm_get_gsi_count(kvm) & ~0x1f;
+		kvm->used_gsi_bitmap = malloc(i >> 3);
+		if (!kvm->used_gsi_bitmap) {
+			pthread_mutex_unlock(&kvm->gsi_mutex);
+			return -ENOMEM;
+		}
+		memset(kvm->used_gsi_bitmap, 0, i >> 3);
+		kvm->max_gsi = i;
+
+		/* Mark all the IOAPIC pin GSIs as already used */
+		for (i = 0; i <= KVM_IOAPIC_NUM_PINS; i++)
+			set_bit(kvm->used_gsi_bitmap, i);
+	}
+
+	gsi = kvm_find_free_gsi(kvm);
+	pthread_mutex_unlock(&kvm->gsi_mutex);
+	return gsi;
 #else
 	return -ENOSYS;
 #endif
 }
+ 
+void kvm_free_irq_route_gsi(kvm_context_t kvm, int gsi)
+{
+#ifdef KVM_CAP_IRQ_ROUTING
+	pthread_mutex_lock(&kvm->gsi_mutex);
+	clear_bit(kvm->used_gsi_bitmap, gsi);
+	pthread_mutex_unlock(&kvm->gsi_mutex);
+#endif
+}
 
 #ifdef KVM_CAP_DEVICE_MSIX
 int kvm_assign_set_msix_nr(kvm_context_t kvm,
diff --git a/kvm/libkvm/libkvm.h b/kvm/libkvm/libkvm.h
index c23d37b..4e9344c 100644
--- a/kvm/libkvm/libkvm.h
+++ b/kvm/libkvm/libkvm.h
@@ -856,6 +856,16 @@ int kvm_commit_irq_routes(kvm_context_t kvm);
  */
 int kvm_get_irq_route_gsi(kvm_context_t kvm);
 
+/*!
+ * \brief Free used GSI number
+ *
+ * Free used GSI number acquired from kvm_get_irq_route_gsi()
+ *
+ * \param kvm Pointer to the current kvm_context
+ * \param gsi GSI number to free
+ */
+void kvm_free_irq_route_gsi(kvm_context_t kvm, int gsi);
+
 #ifdef KVM_CAP_DEVICE_MSIX
 int kvm_assign_set_msix_nr(kvm_context_t kvm,
 			   struct kvm_assigned_msix_nr *msix_nr);


^ permalink raw reply related	[flat|nested] 56+ messages in thread

* Re: [PATCH] kvm: Use a bitmap for tracking used GSIs
  2009-05-07 22:22 [PATCH] kvm: Use a bitmap for tracking used GSIs Alex Williamson
  2009-05-08 22:31 ` [PATCH v2] " Alex Williamson
@ 2009-05-11 12:00 ` Yang, Sheng
  2009-05-12 18:45   ` Alex Williamson
  1 sibling, 1 reply; 56+ messages in thread
From: Yang, Sheng @ 2009-05-11 12:00 UTC (permalink / raw)
  To: kvm; +Cc: Alex Williamson

On Friday 08 May 2009 06:22:20 Alex Williamson wrote:
> We're currently using a counter to track the most recent GSI we've
> handed out.  This quickly hits KVM_MAX_IRQ_ROUTES when using device
> assignment with a driver that regularly toggles the MSI enable bit.
> This can mean only a few minutes of usable run time.  Instead, track
> used GSIs in a bitmap.
>
> Signed-off-by: Alex Williamson <alex.williamson@hp.com>
> ---
>
>  Applies on top of "kvm: device-assignment: Catch GSI overflow"
>
>  hw/device-assignment.c  |    4 ++-
>  kvm/libkvm/kvm-common.h |    3 +-
>  kvm/libkvm/libkvm.c     |   68
> +++++++++++++++++++++++++++++++++++++++++------ kvm/libkvm/libkvm.h     |  
> 10 +++++++
>  4 files changed, 74 insertions(+), 11 deletions(-)
>
> diff --git a/hw/device-assignment.c b/hw/device-assignment.c
> index e06dd08..5bdae24 100644
> --- a/hw/device-assignment.c
> +++ b/hw/device-assignment.c
> @@ -561,8 +561,10 @@ static void free_dev_irq_entries(AssignedDevice *dev)
>  {
>      int i;
>
> -    for (i = 0; i < dev->irq_entries_nr; i++)
> +    for (i = 0; i < dev->irq_entries_nr; i++) {
>          kvm_del_routing_entry(kvm_context, &dev->entry[i]);
> +        kvm_free_irq_route_gsi(kvm_context, dev->entry[i].gsi);
> +    }
>      free(dev->entry);
>      dev->entry = NULL;
>      dev->irq_entries_nr = 0;
> diff --git a/kvm/libkvm/kvm-common.h b/kvm/libkvm/kvm-common.h
> index 591fb53..94f86e5 100644
> --- a/kvm/libkvm/kvm-common.h
> +++ b/kvm/libkvm/kvm-common.h
> @@ -66,8 +66,9 @@ struct kvm_context {
>  #ifdef KVM_CAP_IRQ_ROUTING
>  	struct kvm_irq_routing *irq_routes;
>  	int nr_allocated_irq_routes;
> +	void *used_gsi_bitmap;
> +	int max_gsi;
>  #endif
> -	int max_used_gsi;
>  };
>
>  int kvm_alloc_kernel_memory(kvm_context_t kvm, unsigned long memory,
> diff --git a/kvm/libkvm/libkvm.c b/kvm/libkvm/libkvm.c
> index 2a4165a..43abc7d 100644
> --- a/kvm/libkvm/libkvm.c
> +++ b/kvm/libkvm/libkvm.c
> @@ -1298,8 +1298,6 @@ int kvm_add_routing_entry(kvm_context_t kvm,
>  	new->flags = entry->flags;
>  	new->u = entry->u;
>
> -	if (entry->gsi > kvm->max_used_gsi)
> -		kvm->max_used_gsi = entry->gsi;
>  	return 0;
>  #else
>  	return -ENOSYS;
> @@ -1404,20 +1402,72 @@ int kvm_commit_irq_routes(kvm_context_t kvm)
>  #endif
>  }
>
> +#ifdef KVM_CAP_IRQ_ROUTING
> +static inline void set_bit(unsigned int *buf, int bit)
> +{
> +	buf[bit >> 5] |= (1U << (bit & 0x1f));
> +}
> +
> +static inline void clear_bit(unsigned int *buf, int bit)
> +{
> +	buf[bit >> 5] &= ~(1U << (bit & 0x1f));
> +}
> +
> +static int kvm_find_free_gsi(kvm_context_t kvm)
> +{
> +	int i, bit, gsi;
> +	unsigned int *buf = kvm->used_gsi_bitmap;
> +
> +	for (i = 0; i < (kvm->max_gsi >> 5); i++) {
> +		if (buf[i] != ~0U)
> +			break;
> +	}
> +
> +	if (i == kvm->max_gsi >> 5)
> +		return -ENOSPC;
> +
> +	bit = ffs(~buf[i]);
> +	if (!bit)
> +		return -EAGAIN;
> +
> +	gsi = (bit - 1) | (i << 5);
> +	set_bit(buf, gsi);
> +	return gsi;
> +}
> +#endif
> +
>  int kvm_get_irq_route_gsi(kvm_context_t kvm)
>  {
>  #ifdef KVM_CAP_IRQ_ROUTING
> -	if (kvm->max_used_gsi >= KVM_IOAPIC_NUM_PINS)  {
> -	    if (kvm->max_used_gsi + 1 < kvm_get_gsi_count(kvm))
> -                return kvm->max_used_gsi + 1;
> -            else
> -                return -ENOSPC;
> -        } else
> -            return KVM_IOAPIC_NUM_PINS;
> +	if (!kvm->max_gsi) {
> +		int i;
> +
> +		/* Round the number of GSIs supported to a 4 byte
> +		 * value so we can search it using ints and ffs */
> +		i = kvm_get_gsi_count(kvm) & ~0x1f;
> +		kvm->used_gsi_bitmap = malloc(i >> 3);

3 or 5?

I am a little confused by these magic numbers, including 0x1f...

I think there are something can indicate the length of unsigned long in 
QEmu(sorry, can't find it now...), so how about using ffsl() and get other 
constants based on it?

-- 
regards
Yang, Sheng

> +		if (!kvm->used_gsi_bitmap)
> +			return -ENOMEM;
> +		memset(kvm->used_gsi_bitmap, 0, i >> 3);
> +		kvm->max_gsi = i;
> +
> +		/* Mark all the IOAPIC pin GSIs as already used */
> +		for (i = 0; i <= KVM_IOAPIC_NUM_PINS; i++)
> +			set_bit(kvm->used_gsi_bitmap, i);
> +	}
> +
> +	return kvm_find_free_gsi(kvm);
>  #else
>  	return -ENOSYS;
>  #endif
>  }
> +
> +void kvm_free_irq_route_gsi(kvm_context_t kvm, int gsi)
> +{
> +#ifdef KVM_CAP_IRQ_ROUTING
> +	clear_bit(kvm->used_gsi_bitmap, gsi);
> +#endif
> +}
>
>  #ifdef KVM_CAP_DEVICE_MSIX
>  int kvm_assign_set_msix_nr(kvm_context_t kvm,
> diff --git a/kvm/libkvm/libkvm.h b/kvm/libkvm/libkvm.h
> index c23d37b..4e9344c 100644
> --- a/kvm/libkvm/libkvm.h
> +++ b/kvm/libkvm/libkvm.h
> @@ -856,6 +856,16 @@ int kvm_commit_irq_routes(kvm_context_t kvm);
>   */
>  int kvm_get_irq_route_gsi(kvm_context_t kvm);
>
> +/*!
> + * \brief Free used GSI number
> + *
> + * Free used GSI number acquired from kvm_get_irq_route_gsi()
> + *
> + * \param kvm Pointer to the current kvm_context
> + * \param gsi GSI number to free
> + */
> +void kvm_free_irq_route_gsi(kvm_context_t kvm, int gsi);
> +
>  #ifdef KVM_CAP_DEVICE_MSIX
>  int kvm_assign_set_msix_nr(kvm_context_t kvm,
>  			   struct kvm_assigned_msix_nr *msix_nr);
>
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html



^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] kvm: Use a bitmap for tracking used GSIs
  2009-05-11 12:00 ` [PATCH] " Yang, Sheng
@ 2009-05-12 18:45   ` Alex Williamson
  2009-05-12 19:06     ` Alex Williamson
  0 siblings, 1 reply; 56+ messages in thread
From: Alex Williamson @ 2009-05-12 18:45 UTC (permalink / raw)
  To: Yang, Sheng; +Cc: kvm

On Mon, 2009-05-11 at 12:00 +0000, Yang, Sheng wrote:
> On Friday 08 May 2009 06:22:20 Alex Williamson wrote:
> > +		/* Round the number of GSIs supported to a 4 byte
> > +		 * value so we can search it using ints and ffs */
> > +		i = kvm_get_gsi_count(kvm) & ~0x1f;
> > +		kvm->used_gsi_bitmap = malloc(i >> 3);
> 
> 3 or 5?

3, ie. /8 (bits to bytes)

> I am a little confused by these magic numbers, including 0x1f...

The 5 shift gives us the index into the array of ints, the 0x1f gives us
the bit index into a specific int.  This is very similar to the code in
hw/acpi.c.

> I think there are something can indicate the length of unsigned long in 
> QEmu(sorry, can't find it now...), so how about using ffsl() and get other 
> constants based on it?

We'd probably want to use ffsll() so we can ignore 32b vs 64b longs.
There's HOST_LONG_BITS, but that doesn't actually help defining a shift
value.  Thanks,

Alex


^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: Re: [PATCH] kvm: Use a bitmap for tracking used GSIs
  2009-05-12 18:45   ` Alex Williamson
@ 2009-05-12 19:06     ` Alex Williamson
  2009-05-12 19:10       ` Michael S. Tsirkin
  0 siblings, 1 reply; 56+ messages in thread
From: Alex Williamson @ 2009-05-12 19:06 UTC (permalink / raw)
  To: Yang, Sheng; +Cc: kvm

On Tue, 2009-05-12 at 12:45 -0600, Alex Williamson wrote:
> On Mon, 2009-05-11 at 12:00 +0000, Yang, Sheng wrote:
> > On Friday 08 May 2009 06:22:20 Alex Williamson wrote:
> > > +		/* Round the number of GSIs supported to a 4 byte
> > > +		 * value so we can search it using ints and ffs */
> > > +		i = kvm_get_gsi_count(kvm) & ~0x1f;
> > > +		kvm->used_gsi_bitmap = malloc(i >> 3);
> > 
> > 3 or 5?
> 
> 3, ie. /8 (bits to bytes)
> 
> > I am a little confused by these magic numbers, including 0x1f...
> 
> The 5 shift gives us the index into the array of ints, the 0x1f gives us
> the bit index into a specific int.  This is very similar to the code in
> hw/acpi.c.
> 
> > I think there are something can indicate the length of unsigned long in 
> > QEmu(sorry, can't find it now...), so how about using ffsl() and get other 
> > constants based on it?
> 
> We'd probably want to use ffsll() so we can ignore 32b vs 64b longs.
> There's HOST_LONG_BITS, but that doesn't actually help defining a shift
> value.  Thanks,

Hmm, neither ffsl() or ffsll() are standard.  I'm inclined to stick with
32bits.  We'll likely only be using the first index on x86_64 anyway
(2nd on ia64).  Thanks,

Alex


^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: Re: [PATCH] kvm: Use a bitmap for tracking used GSIs
  2009-05-12 19:06     ` Alex Williamson
@ 2009-05-12 19:10       ` Michael S. Tsirkin
  2009-05-12 19:23         ` Alex Williamson
  0 siblings, 1 reply; 56+ messages in thread
From: Michael S. Tsirkin @ 2009-05-12 19:10 UTC (permalink / raw)
  To: Alex Williamson; +Cc: Yang, Sheng, kvm

On Tue, May 12, 2009 at 01:06:54PM -0600, Alex Williamson wrote:
> On Tue, 2009-05-12 at 12:45 -0600, Alex Williamson wrote:
> > On Mon, 2009-05-11 at 12:00 +0000, Yang, Sheng wrote:
> > > On Friday 08 May 2009 06:22:20 Alex Williamson wrote:
> > > > +		/* Round the number of GSIs supported to a 4 byte
> > > > +		 * value so we can search it using ints and ffs */
> > > > +		i = kvm_get_gsi_count(kvm) & ~0x1f;
> > > > +		kvm->used_gsi_bitmap = malloc(i >> 3);
> > > 
> > > 3 or 5?
> > 
> > 3, ie. /8 (bits to bytes)
> > 
> > > I am a little confused by these magic numbers, including 0x1f...
> > 
> > The 5 shift gives us the index into the array of ints, the 0x1f gives us
> > the bit index into a specific int.  This is very similar to the code in
> > hw/acpi.c.
> > 
> > > I think there are something can indicate the length of unsigned long in 
> > > QEmu(sorry, can't find it now...), so how about using ffsl() and get other 
> > > constants based on it?
> > 
> > We'd probably want to use ffsll() so we can ignore 32b vs 64b longs.
> > There's HOST_LONG_BITS, but that doesn't actually help defining a shift
> > value.  Thanks,
> 
> Hmm, neither ffsl() or ffsll() are standard.  I'm inclined to stick with
> 32bits.  We'll likely only be using the first index on x86_64 anyway
> (2nd on ia64).  Thanks,
> 
> Alex

With MSI, we start with GSI 24, so we'll be using more than just the
first index.

-- 
MST

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: Re: [PATCH] kvm: Use a bitmap for tracking used GSIs
  2009-05-12 19:10       ` Michael S. Tsirkin
@ 2009-05-12 19:23         ` Alex Williamson
  0 siblings, 0 replies; 56+ messages in thread
From: Alex Williamson @ 2009-05-12 19:23 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: Yang, Sheng, kvm

On Tue, 2009-05-12 at 22:10 +0300, Michael S. Tsirkin wrote:
> On Tue, May 12, 2009 at 01:06:54PM -0600, Alex Williamson wrote:
> > On Tue, 2009-05-12 at 12:45 -0600, Alex Williamson wrote:
> > > On Mon, 2009-05-11 at 12:00 +0000, Yang, Sheng wrote:
> > > > On Friday 08 May 2009 06:22:20 Alex Williamson wrote:
> > > > > +		/* Round the number of GSIs supported to a 4 byte
> > > > > +		 * value so we can search it using ints and ffs */
> > > > > +		i = kvm_get_gsi_count(kvm) & ~0x1f;
> > > > > +		kvm->used_gsi_bitmap = malloc(i >> 3);
> > > > 
> > > > 3 or 5?
> > > 
> > > 3, ie. /8 (bits to bytes)
> > > 
> > > > I am a little confused by these magic numbers, including 0x1f...
> > > 
> > > The 5 shift gives us the index into the array of ints, the 0x1f gives us
> > > the bit index into a specific int.  This is very similar to the code in
> > > hw/acpi.c.
> > > 
> > > > I think there are something can indicate the length of unsigned long in 
> > > > QEmu(sorry, can't find it now...), so how about using ffsl() and get other 
> > > > constants based on it?
> > > 
> > > We'd probably want to use ffsll() so we can ignore 32b vs 64b longs.
> > > There's HOST_LONG_BITS, but that doesn't actually help defining a shift
> > > value.  Thanks,
> > 
> > Hmm, neither ffsl() or ffsll() are standard.  I'm inclined to stick with
> > 32bits.  We'll likely only be using the first index on x86_64 anyway
> > (2nd on ia64).  Thanks,
> > 
> > Alex
> 
> With MSI, we start with GSI 24, so we'll be using more than just the
> first index.

Right, that leaves 7 GSIs free in the first 32 bit index for device
assignment, plus 999 GSIs free in the entire table.  If we're worried
about the overhead of using a 4 or 8 byte index into the bitmap, maybe
we should be caching GSIs for specific devices.  This feels like a bit
of an overkill for now though.  Thanks,

Alex


^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH v2] kvm: Use a bitmap for tracking used GSIs
  2009-05-08 22:31 ` [PATCH v2] " Alex Williamson
@ 2009-05-12 19:39   ` Michael S. Tsirkin
  2009-05-12 21:56     ` Alex Williamson
  2009-05-12 22:07   ` [PATCH v3] " Alex Williamson
  1 sibling, 1 reply; 56+ messages in thread
From: Michael S. Tsirkin @ 2009-05-12 19:39 UTC (permalink / raw)
  To: Alex Williamson; +Cc: kvm, sheng


On Fri, May 08, 2009 at 04:31:21PM -0600, Alex Williamson wrote:
> +#ifdef KVM_CAP_IRQ_ROUTING

We don't need these anymore.

> +static inline void set_bit(unsigned int *buf, int bit)
> +{
> +	buf[bit >> 5] |= (1U << (bit & 0x1f));
> +}

external () not needed here. bit >> 5 might be clearer as bit / 32 IMO.

> +
> +static inline void clear_bit(unsigned int *buf, int bit)
> +{
> +	buf[bit >> 5] &= ~(1U << (bit & 0x1f));
> +}

Make bit unsigned. And then bit & 0x1f can be written as bit % 32.
IMO that's easier to parse.

> +
> +static int kvm_find_free_gsi(kvm_context_t kvm)
> +{
> +	int i, bit, gsi;
> +	unsigned int *buf = kvm->used_gsi_bitmap;
> +
> +	for (i = 0; i < (kvm->max_gsi >> 5); i++) {

may be clearer as kvm->max_gsi / 32

> +		if (buf[i] != ~0U)
> +			break;
> +	}

{} around single statement isn't needed

> +
> +	if (i == kvm->max_gsi >> 5)
> +		return -ENOSPC;

May be clearer as kvm->max_gsi / 32
By the way, this math means we can't use all gsi's
if the number is not a multiple of 32.
Round up instead? It's not hard: (kvm->max_gsi + 31) / 32

> +
> +	bit = ffs(~buf[i]);
> +	if (!bit)
> +		return -EAGAIN;

We know it won't be 0, right?
Instead of checking twice, move the ffs call within the loop above?
E.g. like this:
	for (i = 0; i < kvm->max_gsi / 32; ++i)
		if ((bit = ffs(~buf[i])) {
		    gsi = bit - 1 + i * 32;
		    set_bit(buf, gsi);
		    return gsi;
		}

	return -ENOSPC;

> +
> +	gsi = (bit - 1) | (i << 5);

clearer as bit - 1 + i * 32

> +	set_bit(buf, gsi);
> +	return gsi;
> +}
> +#endif
> +
>  int kvm_get_irq_route_gsi(kvm_context_t kvm)
>  {
>  #ifdef KVM_CAP_IRQ_ROUTING
> -	if (kvm->max_used_gsi >= KVM_IOAPIC_NUM_PINS)  {
> -	    if (kvm->max_used_gsi + 1 < kvm_get_gsi_count(kvm))
> -                return kvm->max_used_gsi + 1;
> -            else
> -                return -ENOSPC;
> -        } else
> -            return KVM_IOAPIC_NUM_PINS;
> +	int gsi;
> +
> +	pthread_mutex_lock(&kvm->gsi_mutex);
> +
> +	if (!kvm->max_gsi) {

Why is this lazy allocation required?
Let's do the below in some init function,
and keep code simple?

> +		int i;
> +
> +		/* Round the number of GSIs supported to a 4 byte
> +		 * value so we can search it using ints and ffs */
> +		i = kvm_get_gsi_count(kvm) & ~0x1f;

Should not we round up? Why not?
Again, we can count = kvm_get_gsi_count(kvm) / 32, and use count * 4 below.

> +		kvm->used_gsi_bitmap = malloc(i >> 3);

why not qemu_malloc?

> +		if (!kvm->used_gsi_bitmap) {
> +			pthread_mutex_unlock(&kvm->gsi_mutex);
> +			return -ENOMEM;
> +		}
> +		memset(kvm->used_gsi_bitmap, 0, i >> 3);
> +		kvm->max_gsi = i;
> +
> +		/* Mark all the IOAPIC pin GSIs as already used */
> +		for (i = 0; i <= KVM_IOAPIC_NUM_PINS; i++)

Is this really <=? Not <?

> +			set_bit(kvm->used_gsi_bitmap, i);
> +	}
> +
> +	gsi = kvm_find_free_gsi(kvm);
> +	pthread_mutex_unlock(&kvm->gsi_mutex);
> +	return gsi;
>  #else
>  	return -ENOSYS;
>  #endif
>  }
> + 
> +void kvm_free_irq_route_gsi(kvm_context_t kvm, int gsi)
> +{
> +#ifdef KVM_CAP_IRQ_ROUTING
> +	pthread_mutex_lock(&kvm->gsi_mutex);
> +	clear_bit(kvm->used_gsi_bitmap, gsi);
> +	pthread_mutex_unlock(&kvm->gsi_mutex);
> +#endif
> +}
>  
>  #ifdef KVM_CAP_DEVICE_MSIX
>  int kvm_assign_set_msix_nr(kvm_context_t kvm,
> diff --git a/kvm/libkvm/libkvm.h b/kvm/libkvm/libkvm.h
> index c23d37b..4e9344c 100644
> --- a/kvm/libkvm/libkvm.h
> +++ b/kvm/libkvm/libkvm.h
> @@ -856,6 +856,16 @@ int kvm_commit_irq_routes(kvm_context_t kvm);
>   */
>  int kvm_get_irq_route_gsi(kvm_context_t kvm);
>  
> +/*!
> + * \brief Free used GSI number
> + *
> + * Free used GSI number acquired from kvm_get_irq_route_gsi()
> + *
> + * \param kvm Pointer to the current kvm_context
> + * \param gsi GSI number to free
> + */
> +void kvm_free_irq_route_gsi(kvm_context_t kvm, int gsi);
> +
>  #ifdef KVM_CAP_DEVICE_MSIX
>  int kvm_assign_set_msix_nr(kvm_context_t kvm,
>  			   struct kvm_assigned_msix_nr *msix_nr);
> 

-- 
MST

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH v2] kvm: Use a bitmap for tracking used GSIs
  2009-05-12 19:39   ` Michael S. Tsirkin
@ 2009-05-12 21:56     ` Alex Williamson
  0 siblings, 0 replies; 56+ messages in thread
From: Alex Williamson @ 2009-05-12 21:56 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: kvm, sheng

On Tue, 2009-05-12 at 22:39 +0300, Michael S. Tsirkin wrote:
> On Fri, May 08, 2009 at 04:31:21PM -0600, Alex Williamson wrote:
> > +#ifdef KVM_CAP_IRQ_ROUTING
> 
> We don't need these anymore.

Doesn't that depend on the kernel it's built against?  In any case, I
think it would deserve it's own patch to remove those throughout.
Removing them here would make their usage inconsistent.

> > +static inline void set_bit(unsigned int *buf, int bit)
> > +{
> > +	buf[bit >> 5] |= (1U << (bit & 0x1f));
> > +}
> 
> external () not needed here. bit >> 5 might be clearer as bit / 32 IMO.

Fixed

> > +
> > +static inline void clear_bit(unsigned int *buf, int bit)
> > +{
> > +	buf[bit >> 5] &= ~(1U << (bit & 0x1f));
> > +}
> 
> Make bit unsigned. And then bit & 0x1f can be written as bit % 32.
> IMO that's easier to parse.

Fixed

> > +
> > +static int kvm_find_free_gsi(kvm_context_t kvm)
> > +{
> > +	int i, bit, gsi;
> > +	unsigned int *buf = kvm->used_gsi_bitmap;
> > +
> > +	for (i = 0; i < (kvm->max_gsi >> 5); i++) {
> 
> may be clearer as kvm->max_gsi / 32

Fixed

> > +		if (buf[i] != ~0U)
> > +			break;
> > +	}
> 
> {} around single statement isn't needed

This is different in my new version, so the {} makes sense.

> > +
> > +	if (i == kvm->max_gsi >> 5)
> > +		return -ENOSPC;
> 
> May be clearer as kvm->max_gsi / 32
> By the way, this math means we can't use all gsi's
> if the number is not a multiple of 32.
> Round up instead? It's not hard: (kvm->max_gsi + 31) / 32

Fixed

> > +
> > +	bit = ffs(~buf[i]);
> > +	if (!bit)
> > +		return -EAGAIN;
> 
> We know it won't be 0, right?
> Instead of checking twice, move the ffs call within the loop above?
> E.g. like this:
> 	for (i = 0; i < kvm->max_gsi / 32; ++i)
> 		if ((bit = ffs(~buf[i])) {
> 		    gsi = bit - 1 + i * 32;
> 		    set_bit(buf, gsi);
> 		    return gsi;
> 		}
> 
> 	return -ENOSPC;

Nice, I updated to something similar.

> > +
> > +	gsi = (bit - 1) | (i << 5);
> 
> clearer as bit - 1 + i * 32

Yup, fixed.

> > +	set_bit(buf, gsi);
> > +	return gsi;
> > +}
> > +#endif
> > +
> >  int kvm_get_irq_route_gsi(kvm_context_t kvm)
> >  {
> >  #ifdef KVM_CAP_IRQ_ROUTING
> > -	if (kvm->max_used_gsi >= KVM_IOAPIC_NUM_PINS)  {
> > -	    if (kvm->max_used_gsi + 1 < kvm_get_gsi_count(kvm))
> > -                return kvm->max_used_gsi + 1;
> > -            else
> > -                return -ENOSPC;
> > -        } else
> > -            return KVM_IOAPIC_NUM_PINS;
> > +	int gsi;
> > +
> > +	pthread_mutex_lock(&kvm->gsi_mutex);
> > +
> > +	if (!kvm->max_gsi) {
> 
> Why is this lazy allocation required?
> Let's do the below in some init function,
> and keep code simple?

Moved to kvm_init()

> > +		int i;
> > +
> > +		/* Round the number of GSIs supported to a 4 byte
> > +		 * value so we can search it using ints and ffs */
> > +		i = kvm_get_gsi_count(kvm) & ~0x1f;
> 
> Should not we round up? Why not?
> Again, we can count = kvm_get_gsi_count(kvm) / 32, and use count * 4 below.

Yep, rounded up, and pre-marked the excess bits as used.

> > +		kvm->used_gsi_bitmap = malloc(i >> 3);
> 
> why not qemu_malloc?

qemu_malloc isn't available in libkvm.c.  malloc is consistent with the
rest of the file.

> > +		if (!kvm->used_gsi_bitmap) {
> > +			pthread_mutex_unlock(&kvm->gsi_mutex);
> > +			return -ENOMEM;
> > +		}
> > +		memset(kvm->used_gsi_bitmap, 0, i >> 3);
> > +		kvm->max_gsi = i;
> > +
> > +		/* Mark all the IOAPIC pin GSIs as already used */
> > +		for (i = 0; i <= KVM_IOAPIC_NUM_PINS; i++)
> 
> Is this really <=? Not <?

Fixed.

Thanks, I'll send out a new version shortly.

Alex


^ permalink raw reply	[flat|nested] 56+ messages in thread

* [PATCH v3] kvm: Use a bitmap for tracking used GSIs
  2009-05-08 22:31 ` [PATCH v2] " Alex Williamson
  2009-05-12 19:39   ` Michael S. Tsirkin
@ 2009-05-12 22:07   ` Alex Williamson
  2009-05-13  3:30     ` Yang, Sheng
                       ` (3 more replies)
  1 sibling, 4 replies; 56+ messages in thread
From: Alex Williamson @ 2009-05-12 22:07 UTC (permalink / raw)
  To: kvm, sheng.yang, mst; +Cc: alex.williamson

We're currently using a counter to track the most recent GSI we've
handed out.  This quickly hits KVM_MAX_IRQ_ROUTES when using device
assignment with a driver that regularly toggles the MSI enable bit.
This can mean only a few minutes of usable run time.  Instead, track
used GSIs in a bitmap.

Signed-off-by: Alex Williamson <alex.williamson@hp.com>
---

 v2: Added mutex to protect gsi bitmap
 v3: Updated for comments from Michael Tsirkin
     No longer depends on "[PATCH] kvm: device-assignment: Catch GSI overflow"

 hw/device-assignment.c  |    4 ++
 kvm/libkvm/kvm-common.h |    4 ++
 kvm/libkvm/libkvm.c     |   83 +++++++++++++++++++++++++++++++++++++++++------
 kvm/libkvm/libkvm.h     |   10 ++++++
 4 files changed, 88 insertions(+), 13 deletions(-)

diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index a7365c8..a6cc9b9 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -561,8 +561,10 @@ static void free_dev_irq_entries(AssignedDevice *dev)
 {
     int i;
 
-    for (i = 0; i < dev->irq_entries_nr; i++)
+    for (i = 0; i < dev->irq_entries_nr; i++) {
         kvm_del_routing_entry(kvm_context, &dev->entry[i]);
+        kvm_free_irq_route_gsi(kvm_context, dev->entry[i].gsi);
+    }
     free(dev->entry);
     dev->entry = NULL;
     dev->irq_entries_nr = 0;
diff --git a/kvm/libkvm/kvm-common.h b/kvm/libkvm/kvm-common.h
index 591fb53..4b3cb51 100644
--- a/kvm/libkvm/kvm-common.h
+++ b/kvm/libkvm/kvm-common.h
@@ -66,8 +66,10 @@ struct kvm_context {
 #ifdef KVM_CAP_IRQ_ROUTING
 	struct kvm_irq_routing *irq_routes;
 	int nr_allocated_irq_routes;
+	void *used_gsi_bitmap;
+	int max_gsi;
+	pthread_mutex_t gsi_mutex;
 #endif
-	int max_used_gsi;
 };
 
 int kvm_alloc_kernel_memory(kvm_context_t kvm, unsigned long memory,
diff --git a/kvm/libkvm/libkvm.c b/kvm/libkvm/libkvm.c
index ba0a5d1..3d7ab75 100644
--- a/kvm/libkvm/libkvm.c
+++ b/kvm/libkvm/libkvm.c
@@ -35,6 +35,7 @@
 #include <errno.h>
 #include <sys/ioctl.h>
 #include <inttypes.h>
+#include <pthread.h>
 #include "libkvm.h"
 
 #if defined(__x86_64__) || defined(__i386__)
@@ -65,6 +66,8 @@
 int kvm_abi = EXPECTED_KVM_API_VERSION;
 int kvm_page_size;
 
+static inline void set_bit(uint32_t *buf, unsigned int bit);
+
 struct slot_info {
 	unsigned long phys_addr;
 	unsigned long len;
@@ -286,6 +289,9 @@ kvm_context_t kvm_init(struct kvm_callbacks *callbacks,
 	int fd;
 	kvm_context_t kvm;
 	int r;
+#ifdef KVM_CAP_IRQ_ROUTING
+	int gsi_count, gsi_bytes, i;
+#endif
 
 	fd = open("/dev/kvm", O_RDWR);
 	if (fd == -1) {
@@ -322,6 +328,27 @@ kvm_context_t kvm_init(struct kvm_callbacks *callbacks,
 	kvm->dirty_pages_log_all = 0;
 	kvm->no_irqchip_creation = 0;
 	kvm->no_pit_creation = 0;
+#ifdef KVM_CAP_IRQ_ROUTING
+	pthread_mutex_init(&kvm->gsi_mutex, NULL);
+
+	gsi_count = kvm_get_gsi_count(kvm);
+	/* Round up so we can search ints using ffs */
+	gsi_bytes = (gsi_count + 31) / 32;
+	kvm->used_gsi_bitmap = malloc(gsi_bytes);
+	if (!kvm->used_gsi_bitmap) {
+		pthread_mutex_unlock(&kvm->gsi_mutex);
+		goto out_close;
+	}
+	memset(kvm->used_gsi_bitmap, 0, gsi_bytes);
+	kvm->max_gsi = gsi_bytes * 8;
+
+	/* Mark all the IOAPIC pin GSIs and any over-allocated
+	 * GSIs as already in use. */
+	for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++)
+		set_bit(kvm->used_gsi_bitmap, i);
+	for (i = gsi_count; i < kvm->max_gsi; i++)
+		set_bit(kvm->used_gsi_bitmap, i);
+#endif
 
 	return kvm;
  out_close:
@@ -1298,8 +1325,6 @@ int kvm_add_routing_entry(kvm_context_t kvm,
 	new->flags = entry->flags;
 	new->u = entry->u;
 
-	if (entry->gsi > kvm->max_used_gsi)
-		kvm->max_used_gsi = entry->gsi;
 	return 0;
 #else
 	return -ENOSYS;
@@ -1404,18 +1429,54 @@ int kvm_commit_irq_routes(kvm_context_t kvm)
 #endif
 }
 
+#ifdef KVM_CAP_IRQ_ROUTING
+static inline void set_bit(uint32_t *buf, unsigned int bit)
+{
+	buf[bit / 32] |= 1U << (bit % 32);
+}
+
+static inline void clear_bit(uint32_t *buf, unsigned int bit)
+{
+	buf[bit / 32] &= ~(1U << (bit % 32));
+}
+
+static int kvm_find_free_gsi(kvm_context_t kvm)
+{
+	int i, bit, gsi;
+	uint32_t *buf = kvm->used_gsi_bitmap;
+
+	for (i = 0; i < kvm->max_gsi / 32; i++) {
+		bit = ffs(~buf[i]);
+		if (!bit)
+			continue;
+
+		gsi = bit - 1 + i * 32;
+		set_bit(buf, gsi);
+		return gsi;
+	}
+
+	return -ENOSPC;
+}
+#endif
+
 int kvm_get_irq_route_gsi(kvm_context_t kvm)
 {
+	int gsi = -ENOSYS;
+
 #ifdef KVM_CAP_IRQ_ROUTING
-	if (kvm->max_used_gsi >= KVM_IOAPIC_NUM_PINS)  {
-	    if (kvm->max_used_gsi <= kvm_get_gsi_count(kvm))
-                return kvm->max_used_gsi + 1;
-            else
-                return -ENOSPC;
-        } else
-            return KVM_IOAPIC_NUM_PINS;
-#else
-	return -ENOSYS;
+	pthread_mutex_lock(&kvm->gsi_mutex);
+	gsi = kvm_find_free_gsi(kvm);
+	pthread_mutex_unlock(&kvm->gsi_mutex);
+#endif
+	return gsi;
+}
+ 
+void kvm_free_irq_route_gsi(kvm_context_t kvm, int gsi)
+{
+#ifdef KVM_CAP_IRQ_ROUTING
+	pthread_mutex_lock(&kvm->gsi_mutex);
+	clear_bit(kvm->used_gsi_bitmap, gsi);
+	pthread_mutex_unlock(&kvm->gsi_mutex);
 #endif
 }
 
diff --git a/kvm/libkvm/libkvm.h b/kvm/libkvm/libkvm.h
index 4821a1e..845bb8a 100644
--- a/kvm/libkvm/libkvm.h
+++ b/kvm/libkvm/libkvm.h
@@ -856,6 +856,16 @@ int kvm_commit_irq_routes(kvm_context_t kvm);
  */
 int kvm_get_irq_route_gsi(kvm_context_t kvm);
 
+/*!
+ * \brief Free used GSI number
+ *
+ * Free used GSI number acquired from kvm_get_irq_route_gsi()
+ *
+ * \param kvm Pointer to the current kvm_context
+ * \param gsi GSI number to free
+ */
+void kvm_free_irq_route_gsi(kvm_context_t kvm, int gsi);
+
 #ifdef KVM_CAP_DEVICE_MSIX
 int kvm_assign_set_msix_nr(kvm_context_t kvm,
 			   struct kvm_assigned_msix_nr *msix_nr);


^ permalink raw reply related	[flat|nested] 56+ messages in thread

* Re: [PATCH v3] kvm: Use a bitmap for tracking used GSIs
  2009-05-12 22:07   ` [PATCH v3] " Alex Williamson
@ 2009-05-13  3:30     ` Yang, Sheng
  2009-05-13  3:42       ` Alex Williamson
  2009-05-13  4:41     ` [PATCH v4] " Alex Williamson
                       ` (2 subsequent siblings)
  3 siblings, 1 reply; 56+ messages in thread
From: Yang, Sheng @ 2009-05-13  3:30 UTC (permalink / raw)
  To: Alex Williamson; +Cc: kvm, mst

On Wednesday 13 May 2009 06:07:15 Alex Williamson wrote:
> We're currently using a counter to track the most recent GSI we've
> handed out.  This quickly hits KVM_MAX_IRQ_ROUTES when using device
> assignment with a driver that regularly toggles the MSI enable bit.
> This can mean only a few minutes of usable run time.  Instead, track
> used GSIs in a bitmap.
>
> Signed-off-by: Alex Williamson <alex.williamson@hp.com>
> ---
>
>  v2: Added mutex to protect gsi bitmap
>  v3: Updated for comments from Michael Tsirkin
>      No longer depends on "[PATCH] kvm: device-assignment: Catch GSI
> overflow"
>
>  hw/device-assignment.c  |    4 ++
>  kvm/libkvm/kvm-common.h |    4 ++
>  kvm/libkvm/libkvm.c     |   83
> +++++++++++++++++++++++++++++++++++++++++------ kvm/libkvm/libkvm.h     |  
> 10 ++++++
>  4 files changed, 88 insertions(+), 13 deletions(-)
>
> diff --git a/hw/device-assignment.c b/hw/device-assignment.c
> index a7365c8..a6cc9b9 100644
> --- a/hw/device-assignment.c
> +++ b/hw/device-assignment.c
> @@ -561,8 +561,10 @@ static void free_dev_irq_entries(AssignedDevice *dev)
>  {
>      int i;
>
> -    for (i = 0; i < dev->irq_entries_nr; i++)
> +    for (i = 0; i < dev->irq_entries_nr; i++) {
>          kvm_del_routing_entry(kvm_context, &dev->entry[i]);
> +        kvm_free_irq_route_gsi(kvm_context, dev->entry[i].gsi);
> +    }
>      free(dev->entry);
>      dev->entry = NULL;
>      dev->irq_entries_nr = 0;
> diff --git a/kvm/libkvm/kvm-common.h b/kvm/libkvm/kvm-common.h
> index 591fb53..4b3cb51 100644
> --- a/kvm/libkvm/kvm-common.h
> +++ b/kvm/libkvm/kvm-common.h
> @@ -66,8 +66,10 @@ struct kvm_context {
>  #ifdef KVM_CAP_IRQ_ROUTING
>  	struct kvm_irq_routing *irq_routes;
>  	int nr_allocated_irq_routes;
> +	void *used_gsi_bitmap;
> +	int max_gsi;
> +	pthread_mutex_t gsi_mutex;
>  #endif
> -	int max_used_gsi;
>  };
>
>  int kvm_alloc_kernel_memory(kvm_context_t kvm, unsigned long memory,
> diff --git a/kvm/libkvm/libkvm.c b/kvm/libkvm/libkvm.c
> index ba0a5d1..3d7ab75 100644
> --- a/kvm/libkvm/libkvm.c
> +++ b/kvm/libkvm/libkvm.c
> @@ -35,6 +35,7 @@
>  #include <errno.h>
>  #include <sys/ioctl.h>
>  #include <inttypes.h>
> +#include <pthread.h>
>  #include "libkvm.h"
>
>  #if defined(__x86_64__) || defined(__i386__)
> @@ -65,6 +66,8 @@
>  int kvm_abi = EXPECTED_KVM_API_VERSION;
>  int kvm_page_size;
>
> +static inline void set_bit(uint32_t *buf, unsigned int bit);
> +
>  struct slot_info {
>  	unsigned long phys_addr;
>  	unsigned long len;
> @@ -286,6 +289,9 @@ kvm_context_t kvm_init(struct kvm_callbacks *callbacks,
>  	int fd;
>  	kvm_context_t kvm;
>  	int r;
> +#ifdef KVM_CAP_IRQ_ROUTING
> +	int gsi_count, gsi_bytes, i;
> +#endif
>
>  	fd = open("/dev/kvm", O_RDWR);
>  	if (fd == -1) {
> @@ -322,6 +328,27 @@ kvm_context_t kvm_init(struct kvm_callbacks
> *callbacks, kvm->dirty_pages_log_all = 0;
>  	kvm->no_irqchip_creation = 0;
>  	kvm->no_pit_creation = 0;
> +#ifdef KVM_CAP_IRQ_ROUTING
> +	pthread_mutex_init(&kvm->gsi_mutex, NULL);
> +
> +	gsi_count = kvm_get_gsi_count(kvm);
> +	/* Round up so we can search ints using ffs */
> +	gsi_bytes = (gsi_count + 31) / 32;

CMIW, should it be gsi_bytes = (gsi_count + 7) / 8? This looks like bits-to-
int. 

> +	kvm->used_gsi_bitmap = malloc(gsi_bytes);
> +	if (!kvm->used_gsi_bitmap) {
> +		pthread_mutex_unlock(&kvm->gsi_mutex);
> +		goto out_close;
> +	}
> +	memset(kvm->used_gsi_bitmap, 0, gsi_bytes);
> +	kvm->max_gsi = gsi_bytes * 8;

So max_gsi = gsi_count / 4?

-- 
regards
Yang, Sheng

> +
> +	/* Mark all the IOAPIC pin GSIs and any over-allocated
> +	 * GSIs as already in use. */
> +	for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++)
> +		set_bit(kvm->used_gsi_bitmap, i);
> +	for (i = gsi_count; i < kvm->max_gsi; i++)
> +		set_bit(kvm->used_gsi_bitmap, i);
> +#endif
>
>  	return kvm;
>   out_close:
> @@ -1298,8 +1325,6 @@ int kvm_add_routing_entry(kvm_context_t kvm,
>  	new->flags = entry->flags;
>  	new->u = entry->u;
>
> -	if (entry->gsi > kvm->max_used_gsi)
> -		kvm->max_used_gsi = entry->gsi;
>  	return 0;
>  #else
>  	return -ENOSYS;
> @@ -1404,18 +1429,54 @@ int kvm_commit_irq_routes(kvm_context_t kvm)
>  #endif
>  }
>
> +#ifdef KVM_CAP_IRQ_ROUTING
> +static inline void set_bit(uint32_t *buf, unsigned int bit)
> +{
> +	buf[bit / 32] |= 1U << (bit % 32);
> +}
> +
> +static inline void clear_bit(uint32_t *buf, unsigned int bit)
> +{
> +	buf[bit / 32] &= ~(1U << (bit % 32));
> +}
> +
> +static int kvm_find_free_gsi(kvm_context_t kvm)
> +{
> +	int i, bit, gsi;
> +	uint32_t *buf = kvm->used_gsi_bitmap;
> +
> +	for (i = 0; i < kvm->max_gsi / 32; i++) {
> +		bit = ffs(~buf[i]);
> +		if (!bit)
> +			continue;
> +
> +		gsi = bit - 1 + i * 32;
> +		set_bit(buf, gsi);
> +		return gsi;
> +	}
> +
> +	return -ENOSPC;
> +}
> +#endif
> +
>  int kvm_get_irq_route_gsi(kvm_context_t kvm)
>  {
> +	int gsi = -ENOSYS;
> +
>  #ifdef KVM_CAP_IRQ_ROUTING
> -	if (kvm->max_used_gsi >= KVM_IOAPIC_NUM_PINS)  {
> -	    if (kvm->max_used_gsi <= kvm_get_gsi_count(kvm))
> -                return kvm->max_used_gsi + 1;
> -            else
> -                return -ENOSPC;
> -        } else
> -            return KVM_IOAPIC_NUM_PINS;
> -#else
> -	return -ENOSYS;
> +	pthread_mutex_lock(&kvm->gsi_mutex);
> +	gsi = kvm_find_free_gsi(kvm);
> +	pthread_mutex_unlock(&kvm->gsi_mutex);
> +#endif
> +	return gsi;
> +}
> +
> +void kvm_free_irq_route_gsi(kvm_context_t kvm, int gsi)
> +{
> +#ifdef KVM_CAP_IRQ_ROUTING
> +	pthread_mutex_lock(&kvm->gsi_mutex);
> +	clear_bit(kvm->used_gsi_bitmap, gsi);
> +	pthread_mutex_unlock(&kvm->gsi_mutex);
>  #endif
>  }
>
> diff --git a/kvm/libkvm/libkvm.h b/kvm/libkvm/libkvm.h
> index 4821a1e..845bb8a 100644
> --- a/kvm/libkvm/libkvm.h
> +++ b/kvm/libkvm/libkvm.h
> @@ -856,6 +856,16 @@ int kvm_commit_irq_routes(kvm_context_t kvm);
>   */
>  int kvm_get_irq_route_gsi(kvm_context_t kvm);
>
> +/*!
> + * \brief Free used GSI number
> + *
> + * Free used GSI number acquired from kvm_get_irq_route_gsi()
> + *
> + * \param kvm Pointer to the current kvm_context
> + * \param gsi GSI number to free
> + */
> +void kvm_free_irq_route_gsi(kvm_context_t kvm, int gsi);
> +
>  #ifdef KVM_CAP_DEVICE_MSIX
>  int kvm_assign_set_msix_nr(kvm_context_t kvm,
>  			   struct kvm_assigned_msix_nr *msix_nr);



^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH v3] kvm: Use a bitmap for tracking used GSIs
  2009-05-13  3:30     ` Yang, Sheng
@ 2009-05-13  3:42       ` Alex Williamson
  2009-05-13  4:10         ` Alex Williamson
  0 siblings, 1 reply; 56+ messages in thread
From: Alex Williamson @ 2009-05-13  3:42 UTC (permalink / raw)
  To: Yang, Sheng; +Cc: kvm, mst

On Wed, 2009-05-13 at 11:30 +0800, Yang, Sheng wrote:
> On Wednesday 13 May 2009 06:07:15 Alex Williamson wrote:
> > We're currently using a counter to track the most recent GSI we've
> > handed out.  This quickly hits KVM_MAX_IRQ_ROUTES when using device
> > assignment with a driver that regularly toggles the MSI enable bit.
> > This can mean only a few minutes of usable run time.  Instead, track
> > used GSIs in a bitmap.
> >
> > Signed-off-by: Alex Williamson <alex.williamson@hp.com>
> > ---
> >
> >  v2: Added mutex to protect gsi bitmap
> >  v3: Updated for comments from Michael Tsirkin
> >      No longer depends on "[PATCH] kvm: device-assignment: Catch GSI
> > overflow"
> >
> >  hw/device-assignment.c  |    4 ++
> >  kvm/libkvm/kvm-common.h |    4 ++
> >  kvm/libkvm/libkvm.c     |   83
> > +++++++++++++++++++++++++++++++++++++++++------ kvm/libkvm/libkvm.h     |  
> > 10 ++++++
> >  4 files changed, 88 insertions(+), 13 deletions(-)
> >
> > diff --git a/hw/device-assignment.c b/hw/device-assignment.c
> > index a7365c8..a6cc9b9 100644
> > --- a/hw/device-assignment.c
> > +++ b/hw/device-assignment.c
> > @@ -561,8 +561,10 @@ static void free_dev_irq_entries(AssignedDevice *dev)
> >  {
> >      int i;
> >
> > -    for (i = 0; i < dev->irq_entries_nr; i++)
> > +    for (i = 0; i < dev->irq_entries_nr; i++) {
> >          kvm_del_routing_entry(kvm_context, &dev->entry[i]);
> > +        kvm_free_irq_route_gsi(kvm_context, dev->entry[i].gsi);
> > +    }
> >      free(dev->entry);
> >      dev->entry = NULL;
> >      dev->irq_entries_nr = 0;
> > diff --git a/kvm/libkvm/kvm-common.h b/kvm/libkvm/kvm-common.h
> > index 591fb53..4b3cb51 100644
> > --- a/kvm/libkvm/kvm-common.h
> > +++ b/kvm/libkvm/kvm-common.h
> > @@ -66,8 +66,10 @@ struct kvm_context {
> >  #ifdef KVM_CAP_IRQ_ROUTING
> >  	struct kvm_irq_routing *irq_routes;
> >  	int nr_allocated_irq_routes;
> > +	void *used_gsi_bitmap;
> > +	int max_gsi;
> > +	pthread_mutex_t gsi_mutex;
> >  #endif
> > -	int max_used_gsi;
> >  };
> >
> >  int kvm_alloc_kernel_memory(kvm_context_t kvm, unsigned long memory,
> > diff --git a/kvm/libkvm/libkvm.c b/kvm/libkvm/libkvm.c
> > index ba0a5d1..3d7ab75 100644
> > --- a/kvm/libkvm/libkvm.c
> > +++ b/kvm/libkvm/libkvm.c
> > @@ -35,6 +35,7 @@
> >  #include <errno.h>
> >  #include <sys/ioctl.h>
> >  #include <inttypes.h>
> > +#include <pthread.h>
> >  #include "libkvm.h"
> >
> >  #if defined(__x86_64__) || defined(__i386__)
> > @@ -65,6 +66,8 @@
> >  int kvm_abi = EXPECTED_KVM_API_VERSION;
> >  int kvm_page_size;
> >
> > +static inline void set_bit(uint32_t *buf, unsigned int bit);
> > +
> >  struct slot_info {
> >  	unsigned long phys_addr;
> >  	unsigned long len;
> > @@ -286,6 +289,9 @@ kvm_context_t kvm_init(struct kvm_callbacks *callbacks,
> >  	int fd;
> >  	kvm_context_t kvm;
> >  	int r;
> > +#ifdef KVM_CAP_IRQ_ROUTING
> > +	int gsi_count, gsi_bytes, i;
> > +#endif
> >
> >  	fd = open("/dev/kvm", O_RDWR);
> >  	if (fd == -1) {
> > @@ -322,6 +328,27 @@ kvm_context_t kvm_init(struct kvm_callbacks
> > *callbacks, kvm->dirty_pages_log_all = 0;
> >  	kvm->no_irqchip_creation = 0;
> >  	kvm->no_pit_creation = 0;
> > +#ifdef KVM_CAP_IRQ_ROUTING
> > +	pthread_mutex_init(&kvm->gsi_mutex, NULL);
> > +
> > +	gsi_count = kvm_get_gsi_count(kvm);
> > +	/* Round up so we can search ints using ffs */
> > +	gsi_bytes = (gsi_count + 31) / 32;
> 
> CMIW, should it be gsi_bytes = (gsi_count + 7) / 8? This looks like bits-to-
> int. 

Oops, I missed a multiplier in there.  What you have would be correct
for rounding up to a byte, but we really want to round up to an int, so
I need to factor in a bytes/int, which gives me this:

gsi_bytes = ((gsi_count + 31) / 32) * 4;

Then the rest works out correctly.  Thanks,

Alex

> > +	kvm->used_gsi_bitmap = malloc(gsi_bytes);
> > +	if (!kvm->used_gsi_bitmap) {
> > +		pthread_mutex_unlock(&kvm->gsi_mutex);
> > +		goto out_close;
> > +	}
> > +	memset(kvm->used_gsi_bitmap, 0, gsi_bytes);
> > +	kvm->max_gsi = gsi_bytes * 8;
> 
> So max_gsi = gsi_count / 4?



^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH v3] kvm: Use a bitmap for tracking used GSIs
  2009-05-13  3:42       ` Alex Williamson
@ 2009-05-13  4:10         ` Alex Williamson
  2009-05-13  4:15           ` Yang, Sheng
  0 siblings, 1 reply; 56+ messages in thread
From: Alex Williamson @ 2009-05-13  4:10 UTC (permalink / raw)
  To: Yang, Sheng; +Cc: kvm, mst

On Tue, 2009-05-12 at 21:42 -0600, Alex Williamson wrote:
> On Wed, 2009-05-13 at 11:30 +0800, Yang, Sheng wrote:
> > > +	kvm->used_gsi_bitmap = malloc(gsi_bytes);
> > > +	if (!kvm->used_gsi_bitmap) {
> > > +		pthread_mutex_unlock(&kvm->gsi_mutex);
> > > +		goto out_close;
> > > +	}
> > > +	memset(kvm->used_gsi_bitmap, 0, gsi_bytes);
> > > +	kvm->max_gsi = gsi_bytes * 8;
> > 
> > So max_gsi = gsi_count / 4?

kvm->max_gsi actually becomes the number of GSIs available in the
bitmap, which may be more than gsi_count if we rounded up.  We
preallocate GSIs between gsi_count and max_gsi to avoid using them.
This just lets us not need to special case testing whether a bit in the
last index is < gsi_count.  Am I overlooking anything here?  Thanks,

Alex



^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH v3] kvm: Use a bitmap for tracking used GSIs
  2009-05-13  4:10         ` Alex Williamson
@ 2009-05-13  4:15           ` Yang, Sheng
  0 siblings, 0 replies; 56+ messages in thread
From: Yang, Sheng @ 2009-05-13  4:15 UTC (permalink / raw)
  To: Alex Williamson; +Cc: kvm, mst

On Wednesday 13 May 2009 12:10:34 Alex Williamson wrote:
> On Tue, 2009-05-12 at 21:42 -0600, Alex Williamson wrote:
> > On Wed, 2009-05-13 at 11:30 +0800, Yang, Sheng wrote:
> > > > +	kvm->used_gsi_bitmap = malloc(gsi_bytes);
> > > > +	if (!kvm->used_gsi_bitmap) {
> > > > +		pthread_mutex_unlock(&kvm->gsi_mutex);
> > > > +		goto out_close;
> > > > +	}
> > > > +	memset(kvm->used_gsi_bitmap, 0, gsi_bytes);
> > > > +	kvm->max_gsi = gsi_bytes * 8;
> > >
> > > So max_gsi = gsi_count / 4?
>
> kvm->max_gsi actually becomes the number of GSIs available in the
> bitmap, which may be more than gsi_count if we rounded up.  We
> preallocate GSIs between gsi_count and max_gsi to avoid using them.
> This just lets us not need to special case testing whether a bit in the
> last index is < gsi_count.  Am I overlooking anything here?  Thanks,
>
Oh, I understand that, and I just follow the logic of last comment here(which 
"gsi_bytes = (gsi_count + 31) / 32" )... Sorry for confusion...

-- 
regards
Yang, Sheng

^ permalink raw reply	[flat|nested] 56+ messages in thread

* [PATCH v4] kvm: Use a bitmap for tracking used GSIs
  2009-05-12 22:07   ` [PATCH v3] " Alex Williamson
  2009-05-13  3:30     ` Yang, Sheng
@ 2009-05-13  4:41     ` Alex Williamson
  2009-05-13  4:58       ` Yang, Sheng
                         ` (3 more replies)
  2009-05-13  7:03     ` [PATCH v3] " Michael S. Tsirkin
  2009-05-13  7:04     ` Michael S. Tsirkin
  3 siblings, 4 replies; 56+ messages in thread
From: Alex Williamson @ 2009-05-13  4:41 UTC (permalink / raw)
  To: kvm, sheng.yang, mst; +Cc: alex.williamson

We're currently using a counter to track the most recent GSI we've
handed out.  This quickly hits KVM_MAX_IRQ_ROUTES when using device
assignment with a driver that regularly toggles the MSI enable bit.
This can mean only a few minutes of usable run time.  Instead, track
used GSIs in a bitmap.

Signed-off-by: Alex Williamson <alex.williamson@hp.com>
---

 v2: Added mutex to protect gsi bitmap
 v3: Updated for comments from Michael Tsirkin
     No longer depends on "[PATCH] kvm: device-assignment: Catch GSI overflow"
 v4: Fix gsi_bytes calculation noted by Sheng Yang

 hw/device-assignment.c  |    4 ++
 kvm/libkvm/kvm-common.h |    4 ++
 kvm/libkvm/libkvm.c     |   81 +++++++++++++++++++++++++++++++++++++++++------
 kvm/libkvm/libkvm.h     |   10 ++++++
 4 files changed, 86 insertions(+), 13 deletions(-)

diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index a7365c8..a6cc9b9 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -561,8 +561,10 @@ static void free_dev_irq_entries(AssignedDevice *dev)
 {
     int i;
 
-    for (i = 0; i < dev->irq_entries_nr; i++)
+    for (i = 0; i < dev->irq_entries_nr; i++) {
         kvm_del_routing_entry(kvm_context, &dev->entry[i]);
+        kvm_free_irq_route_gsi(kvm_context, dev->entry[i].gsi);
+    }
     free(dev->entry);
     dev->entry = NULL;
     dev->irq_entries_nr = 0;
diff --git a/kvm/libkvm/kvm-common.h b/kvm/libkvm/kvm-common.h
index 591fb53..4b3cb51 100644
--- a/kvm/libkvm/kvm-common.h
+++ b/kvm/libkvm/kvm-common.h
@@ -66,8 +66,10 @@ struct kvm_context {
 #ifdef KVM_CAP_IRQ_ROUTING
 	struct kvm_irq_routing *irq_routes;
 	int nr_allocated_irq_routes;
+	void *used_gsi_bitmap;
+	int max_gsi;
+	pthread_mutex_t gsi_mutex;
 #endif
-	int max_used_gsi;
 };
 
 int kvm_alloc_kernel_memory(kvm_context_t kvm, unsigned long memory,
diff --git a/kvm/libkvm/libkvm.c b/kvm/libkvm/libkvm.c
index ba0a5d1..3eaa120 100644
--- a/kvm/libkvm/libkvm.c
+++ b/kvm/libkvm/libkvm.c
@@ -35,6 +35,7 @@
 #include <errno.h>
 #include <sys/ioctl.h>
 #include <inttypes.h>
+#include <pthread.h>
 #include "libkvm.h"
 
 #if defined(__x86_64__) || defined(__i386__)
@@ -65,6 +66,8 @@
 int kvm_abi = EXPECTED_KVM_API_VERSION;
 int kvm_page_size;
 
+static inline void set_bit(uint32_t *buf, unsigned int bit);
+
 struct slot_info {
 	unsigned long phys_addr;
 	unsigned long len;
@@ -286,6 +289,9 @@ kvm_context_t kvm_init(struct kvm_callbacks *callbacks,
 	int fd;
 	kvm_context_t kvm;
 	int r;
+#ifdef KVM_CAP_IRQ_ROUTING
+	int gsi_count, gsi_bytes, i;
+#endif
 
 	fd = open("/dev/kvm", O_RDWR);
 	if (fd == -1) {
@@ -322,6 +328,25 @@ kvm_context_t kvm_init(struct kvm_callbacks *callbacks,
 	kvm->dirty_pages_log_all = 0;
 	kvm->no_irqchip_creation = 0;
 	kvm->no_pit_creation = 0;
+#ifdef KVM_CAP_IRQ_ROUTING
+	pthread_mutex_init(&kvm->gsi_mutex, NULL);
+
+	gsi_count = kvm_get_gsi_count(kvm);
+	/* Round up so we can search ints using ffs */
+	gsi_bytes = ((gsi_count + 31) / 32) * 4;
+	kvm->used_gsi_bitmap = malloc(gsi_bytes);
+	if (!kvm->used_gsi_bitmap)
+		goto out_close;
+	memset(kvm->used_gsi_bitmap, 0, gsi_bytes);
+	kvm->max_gsi = gsi_bytes * 8;
+
+	/* Mark all the IOAPIC pin GSIs and any over-allocated
+	 * GSIs as already in use. */
+	for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++)
+		set_bit(kvm->used_gsi_bitmap, i);
+	for (i = gsi_count; i < kvm->max_gsi; i++)
+		set_bit(kvm->used_gsi_bitmap, i);
+#endif
 
 	return kvm;
  out_close:
@@ -1298,8 +1323,6 @@ int kvm_add_routing_entry(kvm_context_t kvm,
 	new->flags = entry->flags;
 	new->u = entry->u;
 
-	if (entry->gsi > kvm->max_used_gsi)
-		kvm->max_used_gsi = entry->gsi;
 	return 0;
 #else
 	return -ENOSYS;
@@ -1404,18 +1427,54 @@ int kvm_commit_irq_routes(kvm_context_t kvm)
 #endif
 }
 
+#ifdef KVM_CAP_IRQ_ROUTING
+static inline void set_bit(uint32_t *buf, unsigned int bit)
+{
+	buf[bit / 32] |= 1U << (bit % 32);
+}
+
+static inline void clear_bit(uint32_t *buf, unsigned int bit)
+{
+	buf[bit / 32] &= ~(1U << (bit % 32));
+}
+
+static int kvm_find_free_gsi(kvm_context_t kvm)
+{
+	int i, bit, gsi;
+	uint32_t *buf = kvm->used_gsi_bitmap;
+
+	for (i = 0; i < kvm->max_gsi / 32; i++) {
+		bit = ffs(~buf[i]);
+		if (!bit)
+			continue;
+
+		gsi = bit - 1 + i * 32;
+		set_bit(buf, gsi);
+		return gsi;
+	}
+
+	return -ENOSPC;
+}
+#endif
+
 int kvm_get_irq_route_gsi(kvm_context_t kvm)
 {
+	int gsi = -ENOSYS;
+
 #ifdef KVM_CAP_IRQ_ROUTING
-	if (kvm->max_used_gsi >= KVM_IOAPIC_NUM_PINS)  {
-	    if (kvm->max_used_gsi <= kvm_get_gsi_count(kvm))
-                return kvm->max_used_gsi + 1;
-            else
-                return -ENOSPC;
-        } else
-            return KVM_IOAPIC_NUM_PINS;
-#else
-	return -ENOSYS;
+	pthread_mutex_lock(&kvm->gsi_mutex);
+	gsi = kvm_find_free_gsi(kvm);
+	pthread_mutex_unlock(&kvm->gsi_mutex);
+#endif
+	return gsi;
+}
+ 
+void kvm_free_irq_route_gsi(kvm_context_t kvm, int gsi)
+{
+#ifdef KVM_CAP_IRQ_ROUTING
+	pthread_mutex_lock(&kvm->gsi_mutex);
+	clear_bit(kvm->used_gsi_bitmap, gsi);
+	pthread_mutex_unlock(&kvm->gsi_mutex);
 #endif
 }
 
diff --git a/kvm/libkvm/libkvm.h b/kvm/libkvm/libkvm.h
index 4821a1e..845bb8a 100644
--- a/kvm/libkvm/libkvm.h
+++ b/kvm/libkvm/libkvm.h
@@ -856,6 +856,16 @@ int kvm_commit_irq_routes(kvm_context_t kvm);
  */
 int kvm_get_irq_route_gsi(kvm_context_t kvm);
 
+/*!
+ * \brief Free used GSI number
+ *
+ * Free used GSI number acquired from kvm_get_irq_route_gsi()
+ *
+ * \param kvm Pointer to the current kvm_context
+ * \param gsi GSI number to free
+ */
+void kvm_free_irq_route_gsi(kvm_context_t kvm, int gsi);
+
 #ifdef KVM_CAP_DEVICE_MSIX
 int kvm_assign_set_msix_nr(kvm_context_t kvm,
 			   struct kvm_assigned_msix_nr *msix_nr);


^ permalink raw reply related	[flat|nested] 56+ messages in thread

* Re: [PATCH v4] kvm: Use a bitmap for tracking used GSIs
  2009-05-13  4:41     ` [PATCH v4] " Alex Williamson
@ 2009-05-13  4:58       ` Yang, Sheng
  2009-05-13  9:47       ` Avi Kivity
                         ` (2 subsequent siblings)
  3 siblings, 0 replies; 56+ messages in thread
From: Yang, Sheng @ 2009-05-13  4:58 UTC (permalink / raw)
  To: Alex Williamson; +Cc: kvm, mst

On Wednesday 13 May 2009 12:41:29 Alex Williamson wrote:
> We're currently using a counter to track the most recent GSI we've
> handed out.  This quickly hits KVM_MAX_IRQ_ROUTES when using device
> assignment with a driver that regularly toggles the MSI enable bit.
> This can mean only a few minutes of usable run time.  Instead, track
> used GSIs in a bitmap.
>
> Signed-off-by: Alex Williamson <alex.williamson@hp.com>
> ---

Acked.

-- 
regards
Yang, Sheng

>
>  v2: Added mutex to protect gsi bitmap
>  v3: Updated for comments from Michael Tsirkin
>      No longer depends on "[PATCH] kvm: device-assignment: Catch GSI
> overflow" v4: Fix gsi_bytes calculation noted by Sheng Yang
>
>  hw/device-assignment.c  |    4 ++
>  kvm/libkvm/kvm-common.h |    4 ++
>  kvm/libkvm/libkvm.c     |   81
> +++++++++++++++++++++++++++++++++++++++++------ kvm/libkvm/libkvm.h     |  
> 10 ++++++
>  4 files changed, 86 insertions(+), 13 deletions(-)
>
> diff --git a/hw/device-assignment.c b/hw/device-assignment.c
> index a7365c8..a6cc9b9 100644
> --- a/hw/device-assignment.c
> +++ b/hw/device-assignment.c
> @@ -561,8 +561,10 @@ static void free_dev_irq_entries(AssignedDevice *dev)
>  {
>      int i;
>
> -    for (i = 0; i < dev->irq_entries_nr; i++)
> +    for (i = 0; i < dev->irq_entries_nr; i++) {
>          kvm_del_routing_entry(kvm_context, &dev->entry[i]);
> +        kvm_free_irq_route_gsi(kvm_context, dev->entry[i].gsi);
> +    }
>      free(dev->entry);
>      dev->entry = NULL;
>      dev->irq_entries_nr = 0;
> diff --git a/kvm/libkvm/kvm-common.h b/kvm/libkvm/kvm-common.h
> index 591fb53..4b3cb51 100644
> --- a/kvm/libkvm/kvm-common.h
> +++ b/kvm/libkvm/kvm-common.h
> @@ -66,8 +66,10 @@ struct kvm_context {
>  #ifdef KVM_CAP_IRQ_ROUTING
>  	struct kvm_irq_routing *irq_routes;
>  	int nr_allocated_irq_routes;
> +	void *used_gsi_bitmap;
> +	int max_gsi;
> +	pthread_mutex_t gsi_mutex;
>  #endif
> -	int max_used_gsi;
>  };
>
>  int kvm_alloc_kernel_memory(kvm_context_t kvm, unsigned long memory,
> diff --git a/kvm/libkvm/libkvm.c b/kvm/libkvm/libkvm.c
> index ba0a5d1..3eaa120 100644
> --- a/kvm/libkvm/libkvm.c
> +++ b/kvm/libkvm/libkvm.c
> @@ -35,6 +35,7 @@
>  #include <errno.h>
>  #include <sys/ioctl.h>
>  #include <inttypes.h>
> +#include <pthread.h>
>  #include "libkvm.h"
>
>  #if defined(__x86_64__) || defined(__i386__)
> @@ -65,6 +66,8 @@
>  int kvm_abi = EXPECTED_KVM_API_VERSION;
>  int kvm_page_size;
>
> +static inline void set_bit(uint32_t *buf, unsigned int bit);
> +
>  struct slot_info {
>  	unsigned long phys_addr;
>  	unsigned long len;
> @@ -286,6 +289,9 @@ kvm_context_t kvm_init(struct kvm_callbacks *callbacks,
>  	int fd;
>  	kvm_context_t kvm;
>  	int r;
> +#ifdef KVM_CAP_IRQ_ROUTING
> +	int gsi_count, gsi_bytes, i;
> +#endif
>
>  	fd = open("/dev/kvm", O_RDWR);
>  	if (fd == -1) {
> @@ -322,6 +328,25 @@ kvm_context_t kvm_init(struct kvm_callbacks
> *callbacks, kvm->dirty_pages_log_all = 0;
>  	kvm->no_irqchip_creation = 0;
>  	kvm->no_pit_creation = 0;
> +#ifdef KVM_CAP_IRQ_ROUTING
> +	pthread_mutex_init(&kvm->gsi_mutex, NULL);
> +
> +	gsi_count = kvm_get_gsi_count(kvm);
> +	/* Round up so we can search ints using ffs */
> +	gsi_bytes = ((gsi_count + 31) / 32) * 4;
> +	kvm->used_gsi_bitmap = malloc(gsi_bytes);
> +	if (!kvm->used_gsi_bitmap)
> +		goto out_close;
> +	memset(kvm->used_gsi_bitmap, 0, gsi_bytes);
> +	kvm->max_gsi = gsi_bytes * 8;
> +
> +	/* Mark all the IOAPIC pin GSIs and any over-allocated
> +	 * GSIs as already in use. */
> +	for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++)
> +		set_bit(kvm->used_gsi_bitmap, i);
> +	for (i = gsi_count; i < kvm->max_gsi; i++)
> +		set_bit(kvm->used_gsi_bitmap, i);
> +#endif
>
>  	return kvm;
>   out_close:
> @@ -1298,8 +1323,6 @@ int kvm_add_routing_entry(kvm_context_t kvm,
>  	new->flags = entry->flags;
>  	new->u = entry->u;
>
> -	if (entry->gsi > kvm->max_used_gsi)
> -		kvm->max_used_gsi = entry->gsi;
>  	return 0;
>  #else
>  	return -ENOSYS;
> @@ -1404,18 +1427,54 @@ int kvm_commit_irq_routes(kvm_context_t kvm)
>  #endif
>  }
>
> +#ifdef KVM_CAP_IRQ_ROUTING
> +static inline void set_bit(uint32_t *buf, unsigned int bit)
> +{
> +	buf[bit / 32] |= 1U << (bit % 32);
> +}
> +
> +static inline void clear_bit(uint32_t *buf, unsigned int bit)
> +{
> +	buf[bit / 32] &= ~(1U << (bit % 32));
> +}
> +
> +static int kvm_find_free_gsi(kvm_context_t kvm)
> +{
> +	int i, bit, gsi;
> +	uint32_t *buf = kvm->used_gsi_bitmap;
> +
> +	for (i = 0; i < kvm->max_gsi / 32; i++) {
> +		bit = ffs(~buf[i]);
> +		if (!bit)
> +			continue;
> +
> +		gsi = bit - 1 + i * 32;
> +		set_bit(buf, gsi);
> +		return gsi;
> +	}
> +
> +	return -ENOSPC;
> +}
> +#endif
> +
>  int kvm_get_irq_route_gsi(kvm_context_t kvm)
>  {
> +	int gsi = -ENOSYS;
> +
>  #ifdef KVM_CAP_IRQ_ROUTING
> -	if (kvm->max_used_gsi >= KVM_IOAPIC_NUM_PINS)  {
> -	    if (kvm->max_used_gsi <= kvm_get_gsi_count(kvm))
> -                return kvm->max_used_gsi + 1;
> -            else
> -                return -ENOSPC;
> -        } else
> -            return KVM_IOAPIC_NUM_PINS;
> -#else
> -	return -ENOSYS;
> +	pthread_mutex_lock(&kvm->gsi_mutex);
> +	gsi = kvm_find_free_gsi(kvm);
> +	pthread_mutex_unlock(&kvm->gsi_mutex);
> +#endif
> +	return gsi;
> +}
> +
> +void kvm_free_irq_route_gsi(kvm_context_t kvm, int gsi)
> +{
> +#ifdef KVM_CAP_IRQ_ROUTING
> +	pthread_mutex_lock(&kvm->gsi_mutex);
> +	clear_bit(kvm->used_gsi_bitmap, gsi);
> +	pthread_mutex_unlock(&kvm->gsi_mutex);
>  #endif
>  }
>
> diff --git a/kvm/libkvm/libkvm.h b/kvm/libkvm/libkvm.h
> index 4821a1e..845bb8a 100644
> --- a/kvm/libkvm/libkvm.h
> +++ b/kvm/libkvm/libkvm.h
> @@ -856,6 +856,16 @@ int kvm_commit_irq_routes(kvm_context_t kvm);
>   */
>  int kvm_get_irq_route_gsi(kvm_context_t kvm);
>
> +/*!
> + * \brief Free used GSI number
> + *
> + * Free used GSI number acquired from kvm_get_irq_route_gsi()
> + *
> + * \param kvm Pointer to the current kvm_context
> + * \param gsi GSI number to free
> + */
> +void kvm_free_irq_route_gsi(kvm_context_t kvm, int gsi);
> +
>  #ifdef KVM_CAP_DEVICE_MSIX
>  int kvm_assign_set_msix_nr(kvm_context_t kvm,
>  			   struct kvm_assigned_msix_nr *msix_nr);



^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH v3] kvm: Use a bitmap for tracking used GSIs
  2009-05-12 22:07   ` [PATCH v3] " Alex Williamson
  2009-05-13  3:30     ` Yang, Sheng
  2009-05-13  4:41     ` [PATCH v4] " Alex Williamson
@ 2009-05-13  7:03     ` Michael S. Tsirkin
  2009-05-13 12:15       ` Alex Williamson
  2009-05-13  7:04     ` Michael S. Tsirkin
  3 siblings, 1 reply; 56+ messages in thread
From: Michael S. Tsirkin @ 2009-05-13  7:03 UTC (permalink / raw)
  To: Alex Williamson; +Cc: kvm, sheng.yang

On Tue, May 12, 2009 at 04:07:15PM -0600, Alex Williamson wrote:
> We're currently using a counter to track the most recent GSI we've
> handed out.  This quickly hits KVM_MAX_IRQ_ROUTES when using device
> assignment with a driver that regularly toggles the MSI enable bit.

BTW, dwhich driver does that? Any idea why?

-- 
MST

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH v3] kvm: Use a bitmap for tracking used GSIs
  2009-05-12 22:07   ` [PATCH v3] " Alex Williamson
                       ` (2 preceding siblings ...)
  2009-05-13  7:03     ` [PATCH v3] " Michael S. Tsirkin
@ 2009-05-13  7:04     ` Michael S. Tsirkin
  2009-05-13 12:19       ` Alex Williamson
  3 siblings, 1 reply; 56+ messages in thread
From: Michael S. Tsirkin @ 2009-05-13  7:04 UTC (permalink / raw)
  To: Alex Williamson; +Cc: kvm, sheng.yang

On Tue, May 12, 2009 at 04:07:15PM -0600, Alex Williamson wrote:
> We're currently using a counter to track the most recent GSI we've
> handed out.  This quickly hits KVM_MAX_IRQ_ROUTES when using device
> assignment with a driver that regularly toggles the MSI enable bit.
> This can mean only a few minutes of usable run time.  Instead, track
> used GSIs in a bitmap.
> 
> Signed-off-by: Alex Williamson <alex.williamson@hp.com>
> ---
> 
>  v2: Added mutex to protect gsi bitmap
>  v3: Updated for comments from Michael Tsirkin
>      No longer depends on "[PATCH] kvm: device-assignment: Catch GSI overflow"
> 
>  hw/device-assignment.c  |    4 ++
>  kvm/libkvm/kvm-common.h |    4 ++
>  kvm/libkvm/libkvm.c     |   83 +++++++++++++++++++++++++++++++++++++++++------
>  kvm/libkvm/libkvm.h     |   10 ++++++
>  4 files changed, 88 insertions(+), 13 deletions(-)
> 
> diff --git a/hw/device-assignment.c b/hw/device-assignment.c
> index a7365c8..a6cc9b9 100644
> --- a/hw/device-assignment.c
> +++ b/hw/device-assignment.c
> @@ -561,8 +561,10 @@ static void free_dev_irq_entries(AssignedDevice *dev)
>  {
>      int i;
>  
> -    for (i = 0; i < dev->irq_entries_nr; i++)
> +    for (i = 0; i < dev->irq_entries_nr; i++) {
>          kvm_del_routing_entry(kvm_context, &dev->entry[i]);
> +        kvm_free_irq_route_gsi(kvm_context, dev->entry[i].gsi);
> +    }
>      free(dev->entry);
>      dev->entry = NULL;
>      dev->irq_entries_nr = 0;
> diff --git a/kvm/libkvm/kvm-common.h b/kvm/libkvm/kvm-common.h
> index 591fb53..4b3cb51 100644
> --- a/kvm/libkvm/kvm-common.h
> +++ b/kvm/libkvm/kvm-common.h
> @@ -66,8 +66,10 @@ struct kvm_context {
>  #ifdef KVM_CAP_IRQ_ROUTING
>  	struct kvm_irq_routing *irq_routes;
>  	int nr_allocated_irq_routes;
> +	void *used_gsi_bitmap;
> +	int max_gsi;
> +	pthread_mutex_t gsi_mutex;
>  #endif
> -	int max_used_gsi;
>  };
>  
>  int kvm_alloc_kernel_memory(kvm_context_t kvm, unsigned long memory,
> diff --git a/kvm/libkvm/libkvm.c b/kvm/libkvm/libkvm.c
> index ba0a5d1..3d7ab75 100644
> --- a/kvm/libkvm/libkvm.c
> +++ b/kvm/libkvm/libkvm.c
> @@ -35,6 +35,7 @@
>  #include <errno.h>
>  #include <sys/ioctl.h>
>  #include <inttypes.h>
> +#include <pthread.h>
>  #include "libkvm.h"
>  
>  #if defined(__x86_64__) || defined(__i386__)
> @@ -65,6 +66,8 @@
>  int kvm_abi = EXPECTED_KVM_API_VERSION;
>  int kvm_page_size;
>  
> +static inline void set_bit(uint32_t *buf, unsigned int bit);
> +
>  struct slot_info {
>  	unsigned long phys_addr;
>  	unsigned long len;
> @@ -286,6 +289,9 @@ kvm_context_t kvm_init(struct kvm_callbacks *callbacks,
>  	int fd;
>  	kvm_context_t kvm;
>  	int r;
> +#ifdef KVM_CAP_IRQ_ROUTING

Let's kill all these ifdefs. Or at least, let's not add them.

-- 
MST

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH v4] kvm: Use a bitmap for tracking used GSIs
  2009-05-13  4:41     ` [PATCH v4] " Alex Williamson
  2009-05-13  4:58       ` Yang, Sheng
@ 2009-05-13  9:47       ` Avi Kivity
  2009-05-13 12:28         ` Alex Williamson
  2009-05-13 14:32       ` Michael S. Tsirkin
  2009-05-13 15:13       ` [PATCH v5] " Alex Williamson
  3 siblings, 1 reply; 56+ messages in thread
From: Avi Kivity @ 2009-05-13  9:47 UTC (permalink / raw)
  To: Alex Williamson; +Cc: kvm, sheng.yang, mst

Alex Williamson wrote:
> We're currently using a counter to track the most recent GSI we've
> handed out.  This quickly hits KVM_MAX_IRQ_ROUTES when using device
> assignment with a driver that regularly toggles the MSI enable bit.
> This can mean only a few minutes of usable run time.  Instead, track
> used GSIs in a bitmap.
>
> Signed-off-by: Alex Williamson <alex.williamson@hp.com>
> ---
>
>  v2: Added mutex to protect gsi bitmap
>   

Why is the mutex needed?  We already have mutex protection in qemu.

How often does the driver enable/disable the MSI (and, do you now why)?  
If it's often enough it may justify kernel support.  (We'll need this 
patch in any case for kernels without this new support).

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH v3] kvm: Use a bitmap for tracking used GSIs
  2009-05-13  7:03     ` [PATCH v3] " Michael S. Tsirkin
@ 2009-05-13 12:15       ` Alex Williamson
  0 siblings, 0 replies; 56+ messages in thread
From: Alex Williamson @ 2009-05-13 12:15 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: kvm, sheng.yang

On Wed, 2009-05-13 at 10:03 +0300, Michael S. Tsirkin wrote:
> On Tue, May 12, 2009 at 04:07:15PM -0600, Alex Williamson wrote:
> > We're currently using a counter to track the most recent GSI we've
> > handed out.  This quickly hits KVM_MAX_IRQ_ROUTES when using device
> > assignment with a driver that regularly toggles the MSI enable bit.
> 
> BTW, dwhich driver does that? Any idea why?

I've seen it from both e1000e and qla2xxx.  I assumed it was some kind
of interrupt mitigation since the devices seem to work fine otherwise.

Alex


^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH v3] kvm: Use a bitmap for tracking used GSIs
  2009-05-13  7:04     ` Michael S. Tsirkin
@ 2009-05-13 12:19       ` Alex Williamson
  2009-05-13 14:25         ` Michael S. Tsirkin
  2009-05-17 20:49         ` Avi Kivity
  0 siblings, 2 replies; 56+ messages in thread
From: Alex Williamson @ 2009-05-13 12:19 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: kvm, sheng.yang

On Wed, 2009-05-13 at 10:04 +0300, Michael S. Tsirkin wrote:
> On Tue, May 12, 2009 at 04:07:15PM -0600, Alex Williamson wrote:
> > @@ -286,6 +289,9 @@ kvm_context_t kvm_init(struct kvm_callbacks *callbacks,
> >  	int fd;
> >  	kvm_context_t kvm;
> >  	int r;
> > +#ifdef KVM_CAP_IRQ_ROUTING
> 
> Let's kill all these ifdefs. Or at least, let's not add them.

AFAICT, they're still used both for builds against older kernels and
architectures that don't support it.  Hollis just added the one around
kvm_get_irq_route_gsi() 10 days ago to fix ppc build.  Has it since been
deprecated?  Thanks,

Alex



^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH v4] kvm: Use a bitmap for tracking used GSIs
  2009-05-13  9:47       ` Avi Kivity
@ 2009-05-13 12:28         ` Alex Williamson
  2009-05-13 12:35           ` Avi Kivity
  0 siblings, 1 reply; 56+ messages in thread
From: Alex Williamson @ 2009-05-13 12:28 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, sheng.yang, mst

On Wed, 2009-05-13 at 12:47 +0300, Avi Kivity wrote:
> Alex Williamson wrote:
> > We're currently using a counter to track the most recent GSI we've
> > handed out.  This quickly hits KVM_MAX_IRQ_ROUTES when using device
> > assignment with a driver that regularly toggles the MSI enable bit.
> > This can mean only a few minutes of usable run time.  Instead, track
> > used GSIs in a bitmap.
> >
> > Signed-off-by: Alex Williamson <alex.williamson@hp.com>
> > ---
> >
> >  v2: Added mutex to protect gsi bitmap
> >   
> 
> Why is the mutex needed?  We already have mutex protection in qemu.

If it's unneeded, I'll happily remove it.  I was assuming in a guest
with multiple devices these could come in parallel, but maybe the guest
is already serialized for config space accesses via cfc/cf8.

> How often does the driver enable/disable the MSI (and, do you now why)?  
> If it's often enough it may justify kernel support.  (We'll need this 
> patch in any case for kernels without this new support).

Seems like multiple times per second.  I don't know why.  Now I'm
starting to get curious why nobody else seems to be hitting this.  I'm
seeing it on an e1000e NIC and Qlogic fibre channel.  Is everyone else
using MSI-X or regular interrupts vs MSI?

Alex



^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH v4] kvm: Use a bitmap for tracking used GSIs
  2009-05-13 12:28         ` Alex Williamson
@ 2009-05-13 12:35           ` Avi Kivity
  2009-05-13 12:55             ` Alex Williamson
  0 siblings, 1 reply; 56+ messages in thread
From: Avi Kivity @ 2009-05-13 12:35 UTC (permalink / raw)
  To: Alex Williamson; +Cc: kvm, sheng.yang, mst

Alex Williamson wrote:
> On Wed, 2009-05-13 at 12:47 +0300, Avi Kivity wrote:
>   
>> Alex Williamson wrote:
>>     
>>> We're currently using a counter to track the most recent GSI we've
>>> handed out.  This quickly hits KVM_MAX_IRQ_ROUTES when using device
>>> assignment with a driver that regularly toggles the MSI enable bit.
>>> This can mean only a few minutes of usable run time.  Instead, track
>>> used GSIs in a bitmap.
>>>
>>> Signed-off-by: Alex Williamson <alex.williamson@hp.com>
>>> ---
>>>
>>>  v2: Added mutex to protect gsi bitmap
>>>   
>>>       
>> Why is the mutex needed?  We already have mutex protection in qemu.
>>     
>
> If it's unneeded, I'll happily remove it.  I was assuming in a guest
> with multiple devices these could come in parallel, but maybe the guest
> is already serialized for config space accesses via cfc/cf8.
>   

The guest may or may not be serialized; we can't rely on that.  But qemu 
is, and we can.

>   
>> How often does the driver enable/disable the MSI (and, do you now why)?  
>> If it's often enough it may justify kernel support.  (We'll need this 
>> patch in any case for kernels without this new support).
>>     
>
> Seems like multiple times per second.  I don't know why.  Now I'm
> starting to get curious why nobody else seems to be hitting this.  I'm
> seeing it on an e1000e NIC and Qlogic fibre channel.  Is everyone else
> using MSI-X or regular interrupts vs MSI?
>   

When you say "multiple times", it is several, or a lot more?

Maybe it is NAPI?

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH v4] kvm: Use a bitmap for tracking used GSIs
  2009-05-13 12:35           ` Avi Kivity
@ 2009-05-13 12:55             ` Alex Williamson
  2009-05-13 13:00               ` Avi Kivity
  0 siblings, 1 reply; 56+ messages in thread
From: Alex Williamson @ 2009-05-13 12:55 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, sheng.yang, mst

On Wed, 2009-05-13 at 15:35 +0300, Avi Kivity wrote:
> Alex Williamson wrote:
> > On Wed, 2009-05-13 at 12:47 +0300, Avi Kivity wrote:
> >   
> >> Alex Williamson wrote:
> >>     
> >>> We're currently using a counter to track the most recent GSI we've
> >>> handed out.  This quickly hits KVM_MAX_IRQ_ROUTES when using device
> >>> assignment with a driver that regularly toggles the MSI enable bit.
> >>> This can mean only a few minutes of usable run time.  Instead, track
> >>> used GSIs in a bitmap.
> >>>
> >>> Signed-off-by: Alex Williamson <alex.williamson@hp.com>
> >>> ---
> >>>
> >>>  v2: Added mutex to protect gsi bitmap
> >>>   
> >>>       
> >> Why is the mutex needed?  We already have mutex protection in qemu.
> >>     
> >
> > If it's unneeded, I'll happily remove it.  I was assuming in a guest
> > with multiple devices these could come in parallel, but maybe the guest
> > is already serialized for config space accesses via cfc/cf8.
> >   
> 
> The guest may or may not be serialized; we can't rely on that.  But qemu 
> is, and we can.

Ok, I'll drop the mutex here.
   
> >> How often does the driver enable/disable the MSI (and, do you now why)?  
> >> If it's often enough it may justify kernel support.  (We'll need this 
> >> patch in any case for kernels without this new support).
> >>     
> >
> > Seems like multiple times per second.  I don't know why.  Now I'm
> > starting to get curious why nobody else seems to be hitting this.  I'm
> > seeing it on an e1000e NIC and Qlogic fibre channel.  Is everyone else
> > using MSI-X or regular interrupts vs MSI?
> >   
> 
> When you say "multiple times", it is several, or a lot more?
> 
> Maybe it is NAPI?

The system would run out of the ~1000 available GSIs in a minute or two
with just an e1000e available to the guest.  So that's something on the
order of 10/s.  This also causes a printk in the host ever time the
interrupt in enabled, which can't help performance and gets pretty
annoying for syslog.  I was guessing some kind of interrupt mitigation,
such as NAPI, but a qlogic FC card seems to do it too (seemingly at a
slower rate).

Alex



^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH v4] kvm: Use a bitmap for tracking used GSIs
  2009-05-13 12:55             ` Alex Williamson
@ 2009-05-13 13:00               ` Avi Kivity
  2009-05-13 13:11                 ` Alex Williamson
  0 siblings, 1 reply; 56+ messages in thread
From: Avi Kivity @ 2009-05-13 13:00 UTC (permalink / raw)
  To: Alex Williamson; +Cc: kvm, sheng.yang, mst

Alex Williamson wrote:
>>
>> When you say "multiple times", it is several, or a lot more?
>>
>> Maybe it is NAPI?
>>     
>
> The system would run out of the ~1000 available GSIs in a minute or two
> with just an e1000e available to the guest.  So that's something on the
> order of 10/s.  This also causes a printk in the host ever time the
> interrupt in enabled, which can't help performance and gets pretty
> annoying for syslog.  I was guessing some kind of interrupt mitigation,
> such as NAPI, but a qlogic FC card seems to do it too (seemingly at a
> slower rate).
>   

I see.  And what is the path by which it is disabled?  The mask bit in 
the MSI entry?

-- 
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.


^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH v4] kvm: Use a bitmap for tracking used GSIs
  2009-05-13 13:00               ` Avi Kivity
@ 2009-05-13 13:11                 ` Alex Williamson
  2009-05-13 13:55                   ` Michael S. Tsirkin
  0 siblings, 1 reply; 56+ messages in thread
From: Alex Williamson @ 2009-05-13 13:11 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, sheng.yang, mst

On Wed, 2009-05-13 at 16:00 +0300, Avi Kivity wrote:
> Alex Williamson wrote:
> >>
> >> When you say "multiple times", it is several, or a lot more?
> >>
> >> Maybe it is NAPI?
> >>     
> >
> > The system would run out of the ~1000 available GSIs in a minute or two
> > with just an e1000e available to the guest.  So that's something on the
> > order of 10/s.  This also causes a printk in the host ever time the
> > interrupt in enabled, which can't help performance and gets pretty
> > annoying for syslog.  I was guessing some kind of interrupt mitigation,
> > such as NAPI, but a qlogic FC card seems to do it too (seemingly at a
> > slower rate).
> >   
> 
> I see.  And what is the path by which it is disabled?  The mask bit in 
> the MSI entry?

Yes, I believe the only path is via a write to the MSI capability in the
PCI config space.

Alex


^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH v4] kvm: Use a bitmap for tracking used GSIs
  2009-05-13 13:11                 ` Alex Williamson
@ 2009-05-13 13:55                   ` Michael S. Tsirkin
  2009-05-13 14:15                     ` Alex Williamson
  0 siblings, 1 reply; 56+ messages in thread
From: Michael S. Tsirkin @ 2009-05-13 13:55 UTC (permalink / raw)
  To: Alex Williamson; +Cc: Avi Kivity, kvm, sheng.yang

On Wed, May 13, 2009 at 07:11:16AM -0600, Alex Williamson wrote:
> On Wed, 2009-05-13 at 16:00 +0300, Avi Kivity wrote:
> > Alex Williamson wrote:
> > >>
> > >> When you say "multiple times", it is several, or a lot more?
> > >>
> > >> Maybe it is NAPI?
> > >>     
> > >
> > > The system would run out of the ~1000 available GSIs in a minute or two
> > > with just an e1000e available to the guest.  So that's something on the
> > > order of 10/s.  This also causes a printk in the host ever time the
> > > interrupt in enabled, which can't help performance and gets pretty
> > > annoying for syslog.  I was guessing some kind of interrupt mitigation,
> > > such as NAPI, but a qlogic FC card seems to do it too (seemingly at a
> > > slower rate).
> > >   
> > 
> > I see.  And what is the path by which it is disabled?  The mask bit in 
> > the MSI entry?
> 
> Yes, I believe the only path is via a write to the MSI capability in the
> PCI config space.
> 
> Alex

Very surprising: I haven't seen any driver disable MSI expect on device
destructor path. Is this a linux guest?

-- 
MST

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH v4] kvm: Use a bitmap for tracking used GSIs
  2009-05-13 13:55                   ` Michael S. Tsirkin
@ 2009-05-13 14:15                     ` Alex Williamson
  2009-05-13 14:30                       ` Michael S. Tsirkin
  2009-05-13 14:33                       ` Alex Williamson
  0 siblings, 2 replies; 56+ messages in thread
From: Alex Williamson @ 2009-05-13 14:15 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: Avi Kivity, kvm, sheng.yang

On Wed, 2009-05-13 at 16:55 +0300, Michael S. Tsirkin wrote:
> On Wed, May 13, 2009 at 07:11:16AM -0600, Alex Williamson wrote:
> > On Wed, 2009-05-13 at 16:00 +0300, Avi Kivity wrote:
> > > Alex Williamson wrote:
> > > >>
> > > >> When you say "multiple times", it is several, or a lot more?
> > > >>
> > > >> Maybe it is NAPI?
> > > >>     
> > > >
> > > > The system would run out of the ~1000 available GSIs in a minute or two
> > > > with just an e1000e available to the guest.  So that's something on the
> > > > order of 10/s.  This also causes a printk in the host ever time the
> > > > interrupt in enabled, which can't help performance and gets pretty
> > > > annoying for syslog.  I was guessing some kind of interrupt mitigation,
> > > > such as NAPI, but a qlogic FC card seems to do it too (seemingly at a
> > > > slower rate).
> > > >   
> > > 
> > > I see.  And what is the path by which it is disabled?  The mask bit in 
> > > the MSI entry?
> > 
> > Yes, I believe the only path is via a write to the MSI capability in the
> > PCI config space.
> > 
> > Alex
> 
> Very surprising: I haven't seen any driver disable MSI expect on device
> destructor path. Is this a linux guest?

Yes, Debian 2.6.26 kernel.  I'll check it it behaves the same on newer
upstream kernels and try to figure out why it's doing it.

Alex


^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH v3] kvm: Use a bitmap for tracking used GSIs
  2009-05-13 12:19       ` Alex Williamson
@ 2009-05-13 14:25         ` Michael S. Tsirkin
  2009-05-17 20:49         ` Avi Kivity
  1 sibling, 0 replies; 56+ messages in thread
From: Michael S. Tsirkin @ 2009-05-13 14:25 UTC (permalink / raw)
  To: Alex Williamson; +Cc: kvm, sheng.yang

On Wed, May 13, 2009 at 06:19:35AM -0600, Alex Williamson wrote:
> On Wed, 2009-05-13 at 10:04 +0300, Michael S. Tsirkin wrote:
> > On Tue, May 12, 2009 at 04:07:15PM -0600, Alex Williamson wrote:
> > > @@ -286,6 +289,9 @@ kvm_context_t kvm_init(struct kvm_callbacks *callbacks,
> > >  	int fd;
> > >  	kvm_context_t kvm;
> > >  	int r;
> > > +#ifdef KVM_CAP_IRQ_ROUTING
> > 
> > Let's kill all these ifdefs. Or at least, let's not add them.
> 
> AFAICT, they're still used both for builds against older kernels and
> architectures that don't support it.

architectures only, I think. you don't know what kernel you will
run on, at build time.

> Hollis just added the one around
> kvm_get_irq_route_gsi() 10 days ago to fix ppc build.  Has it since been
> deprecated?  Thanks,
> 
> Alex
> 

These should be local to libkvm I think.
I think kvm_get_gsi_count already returns an error on ppc,
so things should work without ifdefs.


-- 
MST

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH v4] kvm: Use a bitmap for tracking used GSIs
  2009-05-13 14:15                     ` Alex Williamson
@ 2009-05-13 14:30                       ` Michael S. Tsirkin
  2009-05-13 14:33                       ` Alex Williamson
  1 sibling, 0 replies; 56+ messages in thread
From: Michael S. Tsirkin @ 2009-05-13 14:30 UTC (permalink / raw)
  To: Alex Williamson; +Cc: Avi Kivity, kvm, sheng.yang

On Wed, May 13, 2009 at 08:15:29AM -0600, Alex Williamson wrote:
> On Wed, 2009-05-13 at 16:55 +0300, Michael S. Tsirkin wrote:
> > On Wed, May 13, 2009 at 07:11:16AM -0600, Alex Williamson wrote:
> > > On Wed, 2009-05-13 at 16:00 +0300, Avi Kivity wrote:
> > > > Alex Williamson wrote:
> > > > >>
> > > > >> When you say "multiple times", it is several, or a lot more?
> > > > >>
> > > > >> Maybe it is NAPI?
> > > > >>     
> > > > >
> > > > > The system would run out of the ~1000 available GSIs in a minute or two
> > > > > with just an e1000e available to the guest.  So that's something on the
> > > > > order of 10/s.  This also causes a printk in the host ever time the
> > > > > interrupt in enabled, which can't help performance and gets pretty
> > > > > annoying for syslog.  I was guessing some kind of interrupt mitigation,
> > > > > such as NAPI, but a qlogic FC card seems to do it too (seemingly at a
> > > > > slower rate).
> > > > >   
> > > > 
> > > > I see.  And what is the path by which it is disabled?  The mask bit in 
> > > > the MSI entry?
> > > 
> > > Yes, I believe the only path is via a write to the MSI capability in the
> > > PCI config space.
> > > 
> > > Alex
> > 
> > Very surprising: I haven't seen any driver disable MSI expect on device
> > destructor path. Is this a linux guest?
> 
> Yes, Debian 2.6.26 kernel.  I'll check it it behaves the same on newer
> upstream kernels and try to figure out why it's doing it.
> 
> Alex

Maybe power management powers down the card? Why would that be?
Or maybe it detects some error in emulation and resets the card?

-- 
MST

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH v4] kvm: Use a bitmap for tracking used GSIs
  2009-05-13  4:41     ` [PATCH v4] " Alex Williamson
  2009-05-13  4:58       ` Yang, Sheng
  2009-05-13  9:47       ` Avi Kivity
@ 2009-05-13 14:32       ` Michael S. Tsirkin
  2009-05-13 15:13       ` [PATCH v5] " Alex Williamson
  3 siblings, 0 replies; 56+ messages in thread
From: Michael S. Tsirkin @ 2009-05-13 14:32 UTC (permalink / raw)
  To: Alex Williamson; +Cc: kvm, sheng.yang

On Tue, May 12, 2009 at 10:41:29PM -0600, Alex Williamson wrote:
> +	gsi_count = kvm_get_gsi_count(kvm);
> +	/* Round up so we can search ints using ffs */
> +	gsi_bytes = ((gsi_count + 31) / 32) * 4;
> +	kvm->used_gsi_bitmap = malloc(gsi_bytes);

What happens on error in kvm_get_gsi_count?
gsi_count will be negative ..

-- 
MST

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH v4] kvm: Use a bitmap for tracking used GSIs
  2009-05-13 14:15                     ` Alex Williamson
  2009-05-13 14:30                       ` Michael S. Tsirkin
@ 2009-05-13 14:33                       ` Alex Williamson
  2009-05-13 23:07                         ` Alex Williamson
  1 sibling, 1 reply; 56+ messages in thread
From: Alex Williamson @ 2009-05-13 14:33 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: Avi Kivity, kvm, sheng.yang

On Wed, 2009-05-13 at 08:15 -0600, Alex Williamson wrote:
> On Wed, 2009-05-13 at 16:55 +0300, Michael S. Tsirkin wrote:
> > On Wed, May 13, 2009 at 07:11:16AM -0600, Alex Williamson wrote:
> > > On Wed, 2009-05-13 at 16:00 +0300, Avi Kivity wrote:
> > > > Alex Williamson wrote:
> > > > >>
> > > > >> When you say "multiple times", it is several, or a lot more?
> > > > >>
> > > > >> Maybe it is NAPI?
> > > > >>     
> > > > >
> > > > > The system would run out of the ~1000 available GSIs in a minute or two
> > > > > with just an e1000e available to the guest.  So that's something on the
> > > > > order of 10/s.  This also causes a printk in the host ever time the
> > > > > interrupt in enabled, which can't help performance and gets pretty
> > > > > annoying for syslog.  I was guessing some kind of interrupt mitigation,
> > > > > such as NAPI, but a qlogic FC card seems to do it too (seemingly at a
> > > > > slower rate).
> > > > >   
> > > > 
> > > > I see.  And what is the path by which it is disabled?  The mask bit in 
> > > > the MSI entry?
> > > 
> > > Yes, I believe the only path is via a write to the MSI capability in the
> > > PCI config space.
> > > 
> > > Alex
> > 
> > Very surprising: I haven't seen any driver disable MSI expect on device
> > destructor path. Is this a linux guest?
> 
> Yes, Debian 2.6.26 kernel.  I'll check it it behaves the same on newer
> upstream kernels and try to figure out why it's doing it.

Updating the guest to 2.6.29 seems to fix the interrupt toggling.  So
it's either something in older kernels or something debian introduced,
but that seems unlikely.

Alex


^ permalink raw reply	[flat|nested] 56+ messages in thread

* [PATCH v5] kvm: Use a bitmap for tracking used GSIs
  2009-05-13  4:41     ` [PATCH v4] " Alex Williamson
                         ` (2 preceding siblings ...)
  2009-05-13 14:32       ` Michael S. Tsirkin
@ 2009-05-13 15:13       ` Alex Williamson
  2009-05-13 16:05         ` Michael S. Tsirkin
  2009-05-13 17:28         ` [PATCH v6] " Alex Williamson
  3 siblings, 2 replies; 56+ messages in thread
From: Alex Williamson @ 2009-05-13 15:13 UTC (permalink / raw)
  To: kvm, sheng.yang, mst; +Cc: alex.williamson

We're currently using a counter to track the most recent GSI we've
handed out.  This quickly hits KVM_MAX_IRQ_ROUTES when using device
assignment with a driver that regularly toggles the MSI enable bit.
This can mean only a few minutes of usable run time.  Instead, track
used GSIs in a bitmap.

Signed-off-by: Alex Williamson <alex.williamson@hp.com>
---

 v2: Added mutex to protect gsi bitmap
 v3: Updated for comments from Michael Tsirkin
     No longer depends on "[PATCH] kvm: device-assignment: Catch GSI overflow"
 v4: Fix gsi_bytes calculation noted by Sheng Yang
 v5: Remove mutex per Avi
     Fix negative gsi_count path per Michael
     Remove KVM_CAP_IRQ_ROUTING per Michael, ppc should still be protected
         by the KVM_IOAPIC_NUM_PINS check

 hw/device-assignment.c  |    4 ++-
 kvm/libkvm/kvm-common.h |    3 +-
 kvm/libkvm/libkvm.c     |   74 ++++++++++++++++++++++++++++++++++++++---------
 kvm/libkvm/libkvm.h     |   10 ++++++
 4 files changed, 75 insertions(+), 16 deletions(-)

diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index a7365c8..a6cc9b9 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -561,8 +561,10 @@ static void free_dev_irq_entries(AssignedDevice *dev)
 {
     int i;
 
-    for (i = 0; i < dev->irq_entries_nr; i++)
+    for (i = 0; i < dev->irq_entries_nr; i++) {
         kvm_del_routing_entry(kvm_context, &dev->entry[i]);
+        kvm_free_irq_route_gsi(kvm_context, dev->entry[i].gsi);
+    }
     free(dev->entry);
     dev->entry = NULL;
     dev->irq_entries_nr = 0;
diff --git a/kvm/libkvm/kvm-common.h b/kvm/libkvm/kvm-common.h
index 591fb53..c95c591 100644
--- a/kvm/libkvm/kvm-common.h
+++ b/kvm/libkvm/kvm-common.h
@@ -67,7 +67,8 @@ struct kvm_context {
 	struct kvm_irq_routing *irq_routes;
 	int nr_allocated_irq_routes;
 #endif
-	int max_used_gsi;
+	void *used_gsi_bitmap;
+	int max_gsi;
 };
 
 int kvm_alloc_kernel_memory(kvm_context_t kvm, unsigned long memory,
diff --git a/kvm/libkvm/libkvm.c b/kvm/libkvm/libkvm.c
index ba0a5d1..74fb59b 100644
--- a/kvm/libkvm/libkvm.c
+++ b/kvm/libkvm/libkvm.c
@@ -61,10 +61,13 @@
 #define DPRINTF(fmt, args...) do {} while (0)
 #endif
 
+#define min(x,y) ((x) < (y) ? (x) : (y))
 
 int kvm_abi = EXPECTED_KVM_API_VERSION;
 int kvm_page_size;
 
+static inline void set_bit(uint32_t *buf, unsigned int bit);
+
 struct slot_info {
 	unsigned long phys_addr;
 	unsigned long len;
@@ -285,7 +288,7 @@ kvm_context_t kvm_init(struct kvm_callbacks *callbacks,
 {
 	int fd;
 	kvm_context_t kvm;
-	int r;
+	int r, gsi_count;
 
 	fd = open("/dev/kvm", O_RDWR);
 	if (fd == -1) {
@@ -323,6 +326,28 @@ kvm_context_t kvm_init(struct kvm_callbacks *callbacks,
 	kvm->no_irqchip_creation = 0;
 	kvm->no_pit_creation = 0;
 
+	gsi_count = kvm_get_gsi_count(kvm);
+	if (gsi_count > 0) {
+		int gsi_bytes, i;
+
+		/* Round up so we can search ints using ffs */
+		gsi_bytes = ((gsi_count + 31) / 32) * 4;
+		kvm->used_gsi_bitmap = malloc(gsi_bytes);
+		if (!kvm->used_gsi_bitmap)
+			goto out_close;
+		memset(kvm->used_gsi_bitmap, 0, gsi_bytes);
+		kvm->max_gsi = gsi_bytes * 8;
+
+		/* Mark all the IOAPIC pin GSIs and any over-allocated
+	 	* GSIs as already in use. */
+#ifdef KVM_IOAPIC_NUM_PINS
+		for (i = 0; i < min(KVM_IOAPIC_NUM_PINS, gsi_count); i++)
+			set_bit(kvm->used_gsi_bitmap, i);
+#endif
+		for (i = gsi_count; i < kvm->max_gsi; i++)
+			set_bit(kvm->used_gsi_bitmap, i);
+	}
+
 	return kvm;
  out_close:
 	close(fd);
@@ -1298,8 +1323,6 @@ int kvm_add_routing_entry(kvm_context_t kvm,
 	new->flags = entry->flags;
 	new->u = entry->u;
 
-	if (entry->gsi > kvm->max_used_gsi)
-		kvm->max_used_gsi = entry->gsi;
 	return 0;
 #else
 	return -ENOSYS;
@@ -1404,19 +1427,42 @@ int kvm_commit_irq_routes(kvm_context_t kvm)
 #endif
 }
 
+static inline void set_bit(uint32_t *buf, unsigned int bit)
+{
+	buf[bit / 32] |= 1U << (bit % 32);
+}
+
+static inline void clear_bit(uint32_t *buf, unsigned int bit)
+{
+	buf[bit / 32] &= ~(1U << (bit % 32));
+}
+
+static int kvm_find_free_gsi(kvm_context_t kvm)
+{
+	int i, bit, gsi;
+	uint32_t *buf = kvm->used_gsi_bitmap;
+
+	for (i = 0; i < kvm->max_gsi / 32; i++) {
+		bit = ffs(~buf[i]);
+		if (!bit)
+			continue;
+
+		gsi = bit - 1 + i * 32;
+		set_bit(buf, gsi);
+		return gsi;
+	}
+
+	return -ENOSPC;
+}
+
 int kvm_get_irq_route_gsi(kvm_context_t kvm)
 {
-#ifdef KVM_CAP_IRQ_ROUTING
-	if (kvm->max_used_gsi >= KVM_IOAPIC_NUM_PINS)  {
-	    if (kvm->max_used_gsi <= kvm_get_gsi_count(kvm))
-                return kvm->max_used_gsi + 1;
-            else
-                return -ENOSPC;
-        } else
-            return KVM_IOAPIC_NUM_PINS;
-#else
-	return -ENOSYS;
-#endif
+	return kvm_find_free_gsi(kvm);
+}
+ 
+void kvm_free_irq_route_gsi(kvm_context_t kvm, int gsi)
+{
+	clear_bit(kvm->used_gsi_bitmap, gsi);
 }
 
 #ifdef KVM_CAP_DEVICE_MSIX
diff --git a/kvm/libkvm/libkvm.h b/kvm/libkvm/libkvm.h
index 4821a1e..845bb8a 100644
--- a/kvm/libkvm/libkvm.h
+++ b/kvm/libkvm/libkvm.h
@@ -856,6 +856,16 @@ int kvm_commit_irq_routes(kvm_context_t kvm);
  */
 int kvm_get_irq_route_gsi(kvm_context_t kvm);
 
+/*!
+ * \brief Free used GSI number
+ *
+ * Free used GSI number acquired from kvm_get_irq_route_gsi()
+ *
+ * \param kvm Pointer to the current kvm_context
+ * \param gsi GSI number to free
+ */
+void kvm_free_irq_route_gsi(kvm_context_t kvm, int gsi);
+
 #ifdef KVM_CAP_DEVICE_MSIX
 int kvm_assign_set_msix_nr(kvm_context_t kvm,
 			   struct kvm_assigned_msix_nr *msix_nr);


^ permalink raw reply related	[flat|nested] 56+ messages in thread

* Re: [PATCH v5] kvm: Use a bitmap for tracking used GSIs
  2009-05-13 15:13       ` [PATCH v5] " Alex Williamson
@ 2009-05-13 16:05         ` Michael S. Tsirkin
  2009-05-13 17:13           ` Alex Williamson
  2009-05-17 20:51           ` Avi Kivity
  2009-05-13 17:28         ` [PATCH v6] " Alex Williamson
  1 sibling, 2 replies; 56+ messages in thread
From: Michael S. Tsirkin @ 2009-05-13 16:05 UTC (permalink / raw)
  To: Alex Williamson; +Cc: kvm, sheng.yang, avi

On Wed, May 13, 2009 at 09:13:38AM -0600, Alex Williamson wrote:
> @@ -323,6 +326,28 @@ kvm_context_t kvm_init(struct kvm_callbacks *callbacks,
>  	kvm->no_irqchip_creation = 0;
>  	kvm->no_pit_creation = 0;
>  
> +	gsi_count = kvm_get_gsi_count(kvm);
> +	if (gsi_count > 0) {
> +		int gsi_bytes, i;
> +
> +		/* Round up so we can search ints using ffs */
> +		gsi_bytes = ((gsi_count + 31) / 32) * 4;

Let's take ALIGN macro from linux/kernel.h?

> +		kvm->used_gsi_bitmap = malloc(gsi_bytes);
> +		if (!kvm->used_gsi_bitmap)
> +			goto out_close;
> +		memset(kvm->used_gsi_bitmap, 0, gsi_bytes);
> +		kvm->max_gsi = gsi_bytes * 8;
> +
> +		/* Mark all the IOAPIC pin GSIs and any over-allocated
> +	 	* GSIs as already in use. */

Align '*'s please.

> +#ifdef KVM_IOAPIC_NUM_PINS

I think we should just export
#define KVM_IOAPIC_NUM_PINS 0
for ppc in kernel headers (or in libkvm),
and get rid of this ifdef completely.

Avi, agree?

> +		for (i = 0; i < min(KVM_IOAPIC_NUM_PINS, gsi_count); i++)
> +			set_bit(kvm->used_gsi_bitmap, i);
> +#endif
> +		for (i = gsi_count; i < kvm->max_gsi; i++)
> +			set_bit(kvm->used_gsi_bitmap, i);
> +	}
> +
>  	return kvm;
>   out_close:
>  	close(fd);

-- 
MST

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH v5] kvm: Use a bitmap for tracking used GSIs
  2009-05-13 16:05         ` Michael S. Tsirkin
@ 2009-05-13 17:13           ` Alex Williamson
  2009-05-17 20:51           ` Avi Kivity
  1 sibling, 0 replies; 56+ messages in thread
From: Alex Williamson @ 2009-05-13 17:13 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: kvm, sheng.yang, avi

On Wed, 2009-05-13 at 19:05 +0300, Michael S. Tsirkin wrote:
> On Wed, May 13, 2009 at 09:13:38AM -0600, Alex Williamson wrote:
> > @@ -323,6 +326,28 @@ kvm_context_t kvm_init(struct kvm_callbacks *callbacks,
> >  	kvm->no_irqchip_creation = 0;
> >  	kvm->no_pit_creation = 0;
> >  
> > +	gsi_count = kvm_get_gsi_count(kvm);
> > +	if (gsi_count > 0) {
> > +		int gsi_bytes, i;
> > +
> > +		/* Round up so we can search ints using ffs */
> > +		gsi_bytes = ((gsi_count + 31) / 32) * 4;
> 
> Let's take ALIGN macro from linux/kernel.h?

It's already defined in libkvm.c, I'll just move it up in the file.
There's also a BITMAP_SIZE macro by it that looks like it can be nuked.

> > +		kvm->used_gsi_bitmap = malloc(gsi_bytes);
> > +		if (!kvm->used_gsi_bitmap)
> > +			goto out_close;
> > +		memset(kvm->used_gsi_bitmap, 0, gsi_bytes);
> > +		kvm->max_gsi = gsi_bytes * 8;
> > +
> > +		/* Mark all the IOAPIC pin GSIs and any over-allocated
> > +	 	* GSIs as already in use. */
> 
> Align '*'s please.

Argh, fixed.

> > +#ifdef KVM_IOAPIC_NUM_PINS
> 
> I think we should just export
> #define KVM_IOAPIC_NUM_PINS 0
> for ppc in kernel headers (or in libkvm),
> and get rid of this ifdef completely.

Ok, I'll add an #ifndef and make it zero in libkvm.c.  It can be cleaned
out further from there.  Thanks,

Alex


^ permalink raw reply	[flat|nested] 56+ messages in thread

* [PATCH v6] kvm: Use a bitmap for tracking used GSIs
  2009-05-13 15:13       ` [PATCH v5] " Alex Williamson
  2009-05-13 16:05         ` Michael S. Tsirkin
@ 2009-05-13 17:28         ` Alex Williamson
  2009-05-13 18:46           ` Michael S. Tsirkin
                             ` (2 more replies)
  1 sibling, 3 replies; 56+ messages in thread
From: Alex Williamson @ 2009-05-13 17:28 UTC (permalink / raw)
  To: kvm, sheng.yang, mst; +Cc: alex.williamson

We're currently using a counter to track the most recent GSI we've
handed out.  This quickly hits KVM_MAX_IRQ_ROUTES when using device
assignment with a driver that regularly toggles the MSI enable bit.
This can mean only a few minutes of usable run time.  Instead, track
used GSIs in a bitmap.

Signed-off-by: Alex Williamson <alex.williamson@hp.com>
---

 v2: Added mutex to protect gsi bitmap
 v3: Updated for comments from Michael Tsirkin
     No longer depends on "[PATCH] kvm: device-assignment: Catch GSI overflow"
 v4: Fix gsi_bytes calculation noted by Sheng Yang
 v5: Remove mutex per Avi
     Fix negative gsi_count path per Michael
     Remove KVM_CAP_IRQ_ROUTING per Michael, ppc should still be protected
         by the KVM_IOAPIC_NUM_PINS check
 v6: Make use of ALIGN macro, per Michael
     Define KVM_IOAPIC_NUM_PINS if not already, per Michael
     Fix comment indent, per Michael
     Remove unused BITMAP_SIZE macro

 hw/device-assignment.c  |    4 ++
 kvm/libkvm/kvm-common.h |    3 +-
 kvm/libkvm/libkvm.c     |   80 +++++++++++++++++++++++++++++++++++++----------
 kvm/libkvm/libkvm.h     |   10 ++++++
 4 files changed, 78 insertions(+), 19 deletions(-)

diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index a7365c8..a6cc9b9 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -561,8 +561,10 @@ static void free_dev_irq_entries(AssignedDevice *dev)
 {
     int i;
 
-    for (i = 0; i < dev->irq_entries_nr; i++)
+    for (i = 0; i < dev->irq_entries_nr; i++) {
         kvm_del_routing_entry(kvm_context, &dev->entry[i]);
+        kvm_free_irq_route_gsi(kvm_context, dev->entry[i].gsi);
+    }
     free(dev->entry);
     dev->entry = NULL;
     dev->irq_entries_nr = 0;
diff --git a/kvm/libkvm/kvm-common.h b/kvm/libkvm/kvm-common.h
index 591fb53..c95c591 100644
--- a/kvm/libkvm/kvm-common.h
+++ b/kvm/libkvm/kvm-common.h
@@ -67,7 +67,8 @@ struct kvm_context {
 	struct kvm_irq_routing *irq_routes;
 	int nr_allocated_irq_routes;
 #endif
-	int max_used_gsi;
+	void *used_gsi_bitmap;
+	int max_gsi;
 };
 
 int kvm_alloc_kernel_memory(kvm_context_t kvm, unsigned long memory,
diff --git a/kvm/libkvm/libkvm.c b/kvm/libkvm/libkvm.c
index ba0a5d1..70857c7 100644
--- a/kvm/libkvm/libkvm.c
+++ b/kvm/libkvm/libkvm.c
@@ -61,10 +61,18 @@
 #define DPRINTF(fmt, args...) do {} while (0)
 #endif
 
+#define MIN(x,y) ((x) < (y) ? (x) : (y))
+#define ALIGN(x, y) (((x)+(y)-1) & ~((y)-1))
+
+#ifndef KVM_IOAPIC_NUM_PINS
+#define KVM_IOAPIC_NUM_PINS 0
+#endif
 
 int kvm_abi = EXPECTED_KVM_API_VERSION;
 int kvm_page_size;
 
+static inline void set_bit(uint32_t *buf, unsigned int bit);
+
 struct slot_info {
 	unsigned long phys_addr;
 	unsigned long len;
@@ -285,7 +293,7 @@ kvm_context_t kvm_init(struct kvm_callbacks *callbacks,
 {
 	int fd;
 	kvm_context_t kvm;
-	int r;
+	int r, gsi_count;
 
 	fd = open("/dev/kvm", O_RDWR);
 	if (fd == -1) {
@@ -323,6 +331,26 @@ kvm_context_t kvm_init(struct kvm_callbacks *callbacks,
 	kvm->no_irqchip_creation = 0;
 	kvm->no_pit_creation = 0;
 
+	gsi_count = kvm_get_gsi_count(kvm);
+	if (gsi_count > 0) {
+		int gsi_bits, i;
+
+		/* Round up so we can search ints using ffs */
+		gsi_bits = ALIGN(gsi_count, 32);
+		kvm->used_gsi_bitmap = malloc(gsi_bits / 8);
+		if (!kvm->used_gsi_bitmap)
+			goto out_close;
+		memset(kvm->used_gsi_bitmap, 0, gsi_bits / 8);
+		kvm->max_gsi = gsi_bits;
+
+		/* Mark all the IOAPIC pin GSIs and any over-allocated
+		 * GSIs as already in use. */
+		for (i = 0; i < MIN(KVM_IOAPIC_NUM_PINS, gsi_count); i++)
+			set_bit(kvm->used_gsi_bitmap, i);
+		for (i = gsi_count; i < gsi_bits; i++)
+			set_bit(kvm->used_gsi_bitmap, i);
+	}
+
 	return kvm;
  out_close:
 	close(fd);
@@ -626,9 +654,6 @@ int kvm_get_dirty_pages(kvm_context_t kvm, unsigned long phys_addr, void *buf)
 	return kvm_get_map(kvm, KVM_GET_DIRTY_LOG, slot, buf);
 }
 
-#define ALIGN(x, y)  (((x)+(y)-1) & ~((y)-1))
-#define BITMAP_SIZE(m) (ALIGN(((m)/PAGE_SIZE), sizeof(long) * 8) / 8)
-
 int kvm_get_dirty_pages_range(kvm_context_t kvm, unsigned long phys_addr,
 			      unsigned long len, void *buf, void *opaque,
 			      int (*cb)(unsigned long start, unsigned long len,
@@ -1298,8 +1323,6 @@ int kvm_add_routing_entry(kvm_context_t kvm,
 	new->flags = entry->flags;
 	new->u = entry->u;
 
-	if (entry->gsi > kvm->max_used_gsi)
-		kvm->max_used_gsi = entry->gsi;
 	return 0;
 #else
 	return -ENOSYS;
@@ -1404,19 +1427,42 @@ int kvm_commit_irq_routes(kvm_context_t kvm)
 #endif
 }
 
+static inline void set_bit(uint32_t *buf, unsigned int bit)
+{
+	buf[bit / 32] |= 1U << (bit % 32);
+}
+
+static inline void clear_bit(uint32_t *buf, unsigned int bit)
+{
+	buf[bit / 32] &= ~(1U << (bit % 32));
+}
+
+static int kvm_find_free_gsi(kvm_context_t kvm)
+{
+	int i, bit, gsi;
+	uint32_t *buf = kvm->used_gsi_bitmap;
+
+	for (i = 0; i < kvm->max_gsi / 32; i++) {
+		bit = ffs(~buf[i]);
+		if (!bit)
+			continue;
+
+		gsi = bit - 1 + i * 32;
+		set_bit(buf, gsi);
+		return gsi;
+	}
+
+	return -ENOSPC;
+}
+
 int kvm_get_irq_route_gsi(kvm_context_t kvm)
 {
-#ifdef KVM_CAP_IRQ_ROUTING
-	if (kvm->max_used_gsi >= KVM_IOAPIC_NUM_PINS)  {
-	    if (kvm->max_used_gsi <= kvm_get_gsi_count(kvm))
-                return kvm->max_used_gsi + 1;
-            else
-                return -ENOSPC;
-        } else
-            return KVM_IOAPIC_NUM_PINS;
-#else
-	return -ENOSYS;
-#endif
+	return kvm_find_free_gsi(kvm);
+}
+ 
+void kvm_free_irq_route_gsi(kvm_context_t kvm, int gsi)
+{
+	clear_bit(kvm->used_gsi_bitmap, gsi);
 }
 
 #ifdef KVM_CAP_DEVICE_MSIX
diff --git a/kvm/libkvm/libkvm.h b/kvm/libkvm/libkvm.h
index 4821a1e..845bb8a 100644
--- a/kvm/libkvm/libkvm.h
+++ b/kvm/libkvm/libkvm.h
@@ -856,6 +856,16 @@ int kvm_commit_irq_routes(kvm_context_t kvm);
  */
 int kvm_get_irq_route_gsi(kvm_context_t kvm);
 
+/*!
+ * \brief Free used GSI number
+ *
+ * Free used GSI number acquired from kvm_get_irq_route_gsi()
+ *
+ * \param kvm Pointer to the current kvm_context
+ * \param gsi GSI number to free
+ */
+void kvm_free_irq_route_gsi(kvm_context_t kvm, int gsi);
+
 #ifdef KVM_CAP_DEVICE_MSIX
 int kvm_assign_set_msix_nr(kvm_context_t kvm,
 			   struct kvm_assigned_msix_nr *msix_nr);


^ permalink raw reply related	[flat|nested] 56+ messages in thread

* Re: [PATCH v6] kvm: Use a bitmap for tracking used GSIs
  2009-05-13 17:28         ` [PATCH v6] " Alex Williamson
@ 2009-05-13 18:46           ` Michael S. Tsirkin
  2009-05-17 20:54           ` Avi Kivity
  2009-05-19 20:48           ` [PATCH v7] " Alex Williamson
  2 siblings, 0 replies; 56+ messages in thread
From: Michael S. Tsirkin @ 2009-05-13 18:46 UTC (permalink / raw)
  To: Alex Williamson; +Cc: kvm, sheng.yang, avi

On Wed, May 13, 2009 at 11:28:16AM -0600, Alex Williamson wrote:
> We're currently using a counter to track the most recent GSI we've
> handed out.  This quickly hits KVM_MAX_IRQ_ROUTES when using device
> assignment with a driver that regularly toggles the MSI enable bit.
> This can mean only a few minutes of usable run time.  Instead, track
> used GSIs in a bitmap.
> 
> Signed-off-by: Alex Williamson <alex.williamson@hp.com>

Acked-by: Michael S. Tsirkin <mst@redhat.com>

-- 
MST

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH v4] kvm: Use a bitmap for tracking used GSIs
  2009-05-13 14:33                       ` Alex Williamson
@ 2009-05-13 23:07                         ` Alex Williamson
  2009-05-17 20:47                           ` Avi Kivity
  0 siblings, 1 reply; 56+ messages in thread
From: Alex Williamson @ 2009-05-13 23:07 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: Avi Kivity, kvm, sheng.yang

On Wed, 2009-05-13 at 08:33 -0600, Alex Williamson wrote:
> On Wed, 2009-05-13 at 08:15 -0600, Alex Williamson wrote:
> > On Wed, 2009-05-13 at 16:55 +0300, Michael S. Tsirkin wrote:
> > > Very surprising: I haven't seen any driver disable MSI expect on device
> > > destructor path. Is this a linux guest?
> > 
> > Yes, Debian 2.6.26 kernel.  I'll check it it behaves the same on newer
> > upstream kernels and try to figure out why it's doing it.
> 
> Updating the guest to 2.6.29 seems to fix the interrupt toggling.  So
> it's either something in older kernels or something debian introduced,
> but that seems unlikely.

For the curious, this was fixed prior to 2.6.27-rc1 by this:

commit ce6fce4295ba727b36fdc73040e444bd1aae64cd
Author: Matthew Wilcox
Date:   Fri Jul 25 15:42:58 2008 -0600

    PCI MSI: Don't disable MSIs if the mask bit isn't supported
    
    David Vrabel has a device which generates an interrupt storm on the INTx
    pin if we disable MSI interrupts altogether.  Masking interrupts is only
    a performance optimisation, so we can ignore the request to mask the
    interrupt.

It looks like without the maskbit attribute on MSI, the default way to
mask an MSI interrupt was to toggle the MSI enable bit.  This was
introduced in 58e0543e8f355b32f0778a18858b255adb7402ae, so it's lifespan
was probably 2.6.21 - 2.6.26.

Alex


^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH v4] kvm: Use a bitmap for tracking used GSIs
  2009-05-13 23:07                         ` Alex Williamson
@ 2009-05-17 20:47                           ` Avi Kivity
  2009-05-18 11:12                             ` Michael S. Tsirkin
  0 siblings, 1 reply; 56+ messages in thread
From: Avi Kivity @ 2009-05-17 20:47 UTC (permalink / raw)
  To: Alex Williamson; +Cc: Michael S. Tsirkin, kvm, sheng.yang

Alex Williamson wrote:
> On Wed, 2009-05-13 at 08:33 -0600, Alex Williamson wrote:
>   
>> On Wed, 2009-05-13 at 08:15 -0600, Alex Williamson wrote:
>>     
>>> On Wed, 2009-05-13 at 16:55 +0300, Michael S. Tsirkin wrote:
>>>       
>>>> Very surprising: I haven't seen any driver disable MSI expect on device
>>>> destructor path. Is this a linux guest?
>>>>         
>>> Yes, Debian 2.6.26 kernel.  I'll check it it behaves the same on newer
>>> upstream kernels and try to figure out why it's doing it.
>>>       
>> Updating the guest to 2.6.29 seems to fix the interrupt toggling.  So
>> it's either something in older kernels or something debian introduced,
>> but that seems unlikely.
>>     
>
> For the curious, this was fixed prior to 2.6.27-rc1 by this:
>
> commit ce6fce4295ba727b36fdc73040e444bd1aae64cd
> Author: Matthew Wilcox
> Date:   Fri Jul 25 15:42:58 2008 -0600
>
>     PCI MSI: Don't disable MSIs if the mask bit isn't supported
>     
>     David Vrabel has a device which generates an interrupt storm on the INTx
>     pin if we disable MSI interrupts altogether.  Masking interrupts is only
>     a performance optimisation, so we can ignore the request to mask the
>     interrupt.
>
> It looks like without the maskbit attribute on MSI, the default way to
> mask an MSI interrupt was to toggle the MSI enable bit.  This was
> introduced in 58e0543e8f355b32f0778a18858b255adb7402ae, so it's lifespan
> was probably 2.6.21 - 2.6.26.
>
>   

On the other hand, if the host device supports this maskbit attribute, 
we might want to support it.  I'm not sure exactly how though.

If we trap msi entry writes, we're inviting the guest to exit every time 
it wants to disable interrupts.  If we don't, we're inviting spurious 
interrupts, which will cause unwanted exits and injections.

Maybe we ought to let the guest write to the msi tables without 
trapping, and in the injection logic do something like

    msi_entry = *msi_entry_ptr;
    mb();
    if (msi_entry != msi->last_msi_entry)
         msi_reconfigure();
    if (msi_enabled(msi_entry))
         insert_the_needle();

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH v3] kvm: Use a bitmap for tracking used GSIs
  2009-05-13 12:19       ` Alex Williamson
  2009-05-13 14:25         ` Michael S. Tsirkin
@ 2009-05-17 20:49         ` Avi Kivity
  1 sibling, 0 replies; 56+ messages in thread
From: Avi Kivity @ 2009-05-17 20:49 UTC (permalink / raw)
  To: Alex Williamson; +Cc: Michael S. Tsirkin, kvm, sheng.yang

Alex Williamson wrote:
> On Wed, 2009-05-13 at 10:04 +0300, Michael S. Tsirkin wrote:
>   
>> On Tue, May 12, 2009 at 04:07:15PM -0600, Alex Williamson wrote:
>>     
>>> @@ -286,6 +289,9 @@ kvm_context_t kvm_init(struct kvm_callbacks *callbacks,
>>>  	int fd;
>>>  	kvm_context_t kvm;
>>>  	int r;
>>> +#ifdef KVM_CAP_IRQ_ROUTING
>>>       
>> Let's kill all these ifdefs. Or at least, let's not add them.
>>     
>
> AFAICT, they're still used both for builds against older kernels and
> architectures that don't support it.  Hollis just added the one around
> kvm_get_irq_route_gsi() 10 days ago to fix ppc build.  Has it since been
> deprecated?  Thanks,
>   

They are somewhat documentary.  They used to be needed since we 
supported building against any kernel header, but aren't strictly needed 
now.  Let's keep them until we have Documentation/kvm/extensions.txt.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH v5] kvm: Use a bitmap for tracking used GSIs
  2009-05-13 16:05         ` Michael S. Tsirkin
  2009-05-13 17:13           ` Alex Williamson
@ 2009-05-17 20:51           ` Avi Kivity
  1 sibling, 0 replies; 56+ messages in thread
From: Avi Kivity @ 2009-05-17 20:51 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: Alex Williamson, kvm, sheng.yang

Michael S. Tsirkin wrote:
>
>> +#ifdef KVM_IOAPIC_NUM_PINS
>>     
>
> I think we should just export
> #define KVM_IOAPIC_NUM_PINS 0
> for ppc in kernel headers (or in libkvm),
> and get rid of this ifdef completely.
>
> Avi, agree?
>
>   

ppc doesn't have an ioapic, so it shouldn't have this define.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH v6] kvm: Use a bitmap for tracking used GSIs
  2009-05-13 17:28         ` [PATCH v6] " Alex Williamson
  2009-05-13 18:46           ` Michael S. Tsirkin
@ 2009-05-17 20:54           ` Avi Kivity
  2009-05-18 22:32             ` Alex Williamson
  2009-05-19 20:48           ` [PATCH v7] " Alex Williamson
  2 siblings, 1 reply; 56+ messages in thread
From: Avi Kivity @ 2009-05-17 20:54 UTC (permalink / raw)
  To: Alex Williamson; +Cc: kvm, sheng.yang, mst

Alex Williamson wrote:
> We're currently using a counter to track the most recent GSI we've
> handed out.  This quickly hits KVM_MAX_IRQ_ROUTES when using device
> assignment with a driver that regularly toggles the MSI enable bit.
> This can mean only a few minutes of usable run time.  Instead, track
> used GSIs in a bitmap.
>
>  v6: Make use of ALIGN macro, per Michael
>      Define KVM_IOAPIC_NUM_PINS if not already, per Michael
>   

Due to me being slow I nacked this after you prepared the new patch.  Sorry.

We could define have platform_gsis passed from qemu to tell us how many 
GSIs to reserve (let's pretend libkvm isn't on death row for a moment).


-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH v4] kvm: Use a bitmap for tracking used GSIs
  2009-05-17 20:47                           ` Avi Kivity
@ 2009-05-18 11:12                             ` Michael S. Tsirkin
  2009-05-18 11:36                               ` Avi Kivity
  0 siblings, 1 reply; 56+ messages in thread
From: Michael S. Tsirkin @ 2009-05-18 11:12 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Alex Williamson, kvm, sheng.yang

On Sun, May 17, 2009 at 11:47:53PM +0300, Avi Kivity wrote:
> Alex Williamson wrote:
>> On Wed, 2009-05-13 at 08:33 -0600, Alex Williamson wrote:
>>   
>>> On Wed, 2009-05-13 at 08:15 -0600, Alex Williamson wrote:
>>>     
>>>> On Wed, 2009-05-13 at 16:55 +0300, Michael S. Tsirkin wrote:
>>>>       
>>>>> Very surprising: I haven't seen any driver disable MSI expect on device
>>>>> destructor path. Is this a linux guest?
>>>>>         
>>>> Yes, Debian 2.6.26 kernel.  I'll check it it behaves the same on newer
>>>> upstream kernels and try to figure out why it's doing it.
>>>>       
>>> Updating the guest to 2.6.29 seems to fix the interrupt toggling.  So
>>> it's either something in older kernels or something debian introduced,
>>> but that seems unlikely.
>>>     
>>
>> For the curious, this was fixed prior to 2.6.27-rc1 by this:
>>
>> commit ce6fce4295ba727b36fdc73040e444bd1aae64cd
>> Author: Matthew Wilcox
>> Date:   Fri Jul 25 15:42:58 2008 -0600
>>
>>     PCI MSI: Don't disable MSIs if the mask bit isn't supported
>>         David Vrabel has a device which generates an interrupt storm on 
>> the INTx
>>     pin if we disable MSI interrupts altogether.  Masking interrupts is only
>>     a performance optimisation, so we can ignore the request to mask the
>>     interrupt.
>>
>> It looks like without the maskbit attribute on MSI, the default way to
>> mask an MSI interrupt was to toggle the MSI enable bit.  This was
>> introduced in 58e0543e8f355b32f0778a18858b255adb7402ae, so it's lifespan
>> was probably 2.6.21 - 2.6.26.
>>
>>   
>
> On the other hand, if the host device supports this maskbit attribute,  
> we might want to support it.  I'm not sure exactly how though.
>
> If we trap msi entry writes, we're inviting the guest to exit every time  
> it wants to disable interrupts.  If we don't, we're inviting spurious  
> interrupts, which will cause unwanted exits and injections.

Avi, I think you are mixing up MSI and MSI-X. MSI does not have any tables:
all changes go through configuration writes, which AFAIK we always trap.
Isn't that right?

On the other hand, in MSI-X mask bit is mandatory, not optional
so we'll have to support it for assigned devices at some point.

If we are worried about speed of masking/unmasking MSI-X interrupts for
assigned devices (older kernels used to mask them, recent kernels leave
this to drivers) we will probably need to have MSI-X support in the
kernel, and have kernel examine the mask bit before injecting the
interrupt, just like real devices do.

> Maybe we ought to let the guest write to the msi tables without  
> trapping, and in the injection logic do something like
>
>    msi_entry = *msi_entry_ptr;
>    mb();
>    if (msi_entry != msi->last_msi_entry)
>         msi_reconfigure();
>    if (msi_enabled(msi_entry))
>         insert_the_needle();

I don't really understand why do you need the reconfigure
and tracking last msi entry here.

-- 
MST

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH v4] kvm: Use a bitmap for tracking used GSIs
  2009-05-18 11:12                             ` Michael S. Tsirkin
@ 2009-05-18 11:36                               ` Avi Kivity
  2009-05-18 12:19                                 ` Michael S. Tsirkin
  0 siblings, 1 reply; 56+ messages in thread
From: Avi Kivity @ 2009-05-18 11:36 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: Alex Williamson, kvm, sheng.yang

Michael S. Tsirkin wrote:
> On Sun, May 17, 2009 at 11:47:53PM +0300, Avi Kivity wrote:
>   
>> Alex Williamson wrote:
>>     
>>> On Wed, 2009-05-13 at 08:33 -0600, Alex Williamson wrote:
>>>   
>>>       
>>>> On Wed, 2009-05-13 at 08:15 -0600, Alex Williamson wrote:
>>>>     
>>>>         
>>>>> On Wed, 2009-05-13 at 16:55 +0300, Michael S. Tsirkin wrote:
>>>>>       
>>>>>           
>>>>>> Very surprising: I haven't seen any driver disable MSI expect on device
>>>>>> destructor path. Is this a linux guest?
>>>>>>         
>>>>>>             
>>>>> Yes, Debian 2.6.26 kernel.  I'll check it it behaves the same on newer
>>>>> upstream kernels and try to figure out why it's doing it.
>>>>>       
>>>>>           
>>>> Updating the guest to 2.6.29 seems to fix the interrupt toggling.  So
>>>> it's either something in older kernels or something debian introduced,
>>>> but that seems unlikely.
>>>>     
>>>>         
>>> For the curious, this was fixed prior to 2.6.27-rc1 by this:
>>>
>>> commit ce6fce4295ba727b36fdc73040e444bd1aae64cd
>>> Author: Matthew Wilcox
>>> Date:   Fri Jul 25 15:42:58 2008 -0600
>>>
>>>     PCI MSI: Don't disable MSIs if the mask bit isn't supported
>>>         David Vrabel has a device which generates an interrupt storm on 
>>> the INTx
>>>     pin if we disable MSI interrupts altogether.  Masking interrupts is only
>>>     a performance optimisation, so we can ignore the request to mask the
>>>     interrupt.
>>>
>>> It looks like without the maskbit attribute on MSI, the default way to
>>> mask an MSI interrupt was to toggle the MSI enable bit.  This was
>>> introduced in 58e0543e8f355b32f0778a18858b255adb7402ae, so it's lifespan
>>> was probably 2.6.21 - 2.6.26.
>>>
>>>   
>>>       
>> On the other hand, if the host device supports this maskbit attribute,  
>> we might want to support it.  I'm not sure exactly how though.
>>
>> If we trap msi entry writes, we're inviting the guest to exit every time  
>> it wants to disable interrupts.  If we don't, we're inviting spurious  
>> interrupts, which will cause unwanted exits and injections.
>>     
>
> Avi, I think you are mixing up MSI and MSI-X. MSI does not have any tables:
> all changes go through configuration writes, which AFAIK we always trap.
> Isn't that right?
>   

Right.

> On the other hand, in MSI-X mask bit is mandatory, not optional
> so we'll have to support it for assigned devices at some point.
>
> If we are worried about speed of masking/unmasking MSI-X interrupts for
> assigned devices (older kernels used to mask them, recent kernels leave
> this to drivers) we will probably need to have MSI-X support in the
> kernel, and have kernel examine the mask bit before injecting the
> interrupt, just like real devices do.
>   

Yes.  Let's actually quantify this though.  Several times per second 
doesn't qualify.

>> Maybe we ought to let the guest write to the msi tables without  
>> trapping, and in the injection logic do something like
>>
>>    msi_entry = *msi_entry_ptr;
>>    mb();
>>    if (msi_entry != msi->last_msi_entry)
>>         msi_reconfigure();
>>    if (msi_enabled(msi_entry))
>>         insert_the_needle();
>>     
>
> I don't really understand why do you need the reconfigure
> and tracking last msi entry here.
>   

The msi entry can change the vector and delivery mode, no?  This way we 
can cache some stuff instead of decoding it each time.  For example, if 
the entry points at a specific vcpu, we can cache the vcpu pointer, 
instead of searching for the apic ID.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH v4] kvm: Use a bitmap for tracking used GSIs
  2009-05-18 11:36                               ` Avi Kivity
@ 2009-05-18 12:19                                 ` Michael S. Tsirkin
  2009-05-18 12:33                                   ` Avi Kivity
  0 siblings, 1 reply; 56+ messages in thread
From: Michael S. Tsirkin @ 2009-05-18 12:19 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Alex Williamson, kvm, sheng.yang

On Mon, May 18, 2009 at 02:36:41PM +0300, Avi Kivity wrote:
> Michael S. Tsirkin wrote:
>> On Sun, May 17, 2009 at 11:47:53PM +0300, Avi Kivity wrote:
>>   
>>> Alex Williamson wrote:
>>>     
>>>> On Wed, 2009-05-13 at 08:33 -0600, Alex Williamson wrote:
>>>>         
>>>>> On Wed, 2009-05-13 at 08:15 -0600, Alex Williamson wrote:
>>>>>             
>>>>>> On Wed, 2009-05-13 at 16:55 +0300, Michael S. Tsirkin wrote:
>>>>>>                 
>>>>>>> Very surprising: I haven't seen any driver disable MSI expect on device
>>>>>>> destructor path. Is this a linux guest?
>>>>>>>                     
>>>>>> Yes, Debian 2.6.26 kernel.  I'll check it it behaves the same on newer
>>>>>> upstream kernels and try to figure out why it's doing it.
>>>>>>                 
>>>>> Updating the guest to 2.6.29 seems to fix the interrupt toggling.  So
>>>>> it's either something in older kernels or something debian introduced,
>>>>> but that seems unlikely.
>>>>>             
>>>> For the curious, this was fixed prior to 2.6.27-rc1 by this:
>>>>
>>>> commit ce6fce4295ba727b36fdc73040e444bd1aae64cd
>>>> Author: Matthew Wilcox
>>>> Date:   Fri Jul 25 15:42:58 2008 -0600
>>>>
>>>>     PCI MSI: Don't disable MSIs if the mask bit isn't supported
>>>>         David Vrabel has a device which generates an interrupt 
>>>> storm on the INTx
>>>>     pin if we disable MSI interrupts altogether.  Masking interrupts is only
>>>>     a performance optimisation, so we can ignore the request to mask the
>>>>     interrupt.
>>>>
>>>> It looks like without the maskbit attribute on MSI, the default way to
>>>> mask an MSI interrupt was to toggle the MSI enable bit.  This was
>>>> introduced in 58e0543e8f355b32f0778a18858b255adb7402ae, so it's lifespan
>>>> was probably 2.6.21 - 2.6.26.
>>>>
>>>>         
>>> On the other hand, if the host device supports this maskbit 
>>> attribute,  we might want to support it.  I'm not sure exactly how 
>>> though.
>>>
>>> If we trap msi entry writes, we're inviting the guest to exit every 
>>> time  it wants to disable interrupts.  If we don't, we're inviting 
>>> spurious  interrupts, which will cause unwanted exits and injections.
>>>     
>>
>> Avi, I think you are mixing up MSI and MSI-X. MSI does not have any tables:
>> all changes go through configuration writes, which AFAIK we always trap.
>> Isn't that right?
>>   
>
> Right.
>
>> On the other hand, in MSI-X mask bit is mandatory, not optional
>> so we'll have to support it for assigned devices at some point.
>>
>> If we are worried about speed of masking/unmasking MSI-X interrupts for
>> assigned devices (older kernels used to mask them, recent kernels leave
>> this to drivers) we will probably need to have MSI-X support in the
>> kernel, and have kernel examine the mask bit before injecting the
>> interrupt, just like real devices do.
>>   
>
> Yes.

Actually, if we do that, we'll need to address a race where a driver
has updated the mask bit in the window after we tested it
and before we inject the interrupt. Not sure how to do this.

> Let's actually quantify this though.  Several times per second  
> doesn't qualify.

Oh, I didn't actually imply that we need to optimize this path.
You seemed worried about the speed of masking the interrupt,
and I commented that to optimize it we'll have to move it
to kernel.

>>> Maybe we ought to let the guest write to the msi tables without   
>>> trapping, and in the injection logic do something like
>>>
>>>    msi_entry = *msi_entry_ptr;
>>>    mb();
>>>    if (msi_entry != msi->last_msi_entry)
>>>         msi_reconfigure();
>>>    if (msi_enabled(msi_entry))
>>>         insert_the_needle();
>>>     
>>
>> I don't really understand why do you need the reconfigure
>> and tracking last msi entry here.
>>   
>
> The msi entry can change the vector and delivery mode, no?  This way we  
> can cache some stuff instead of decoding it each time.  For example, if  
> the entry points at a specific vcpu, we can cache the vcpu pointer,  
> instead of searching for the apic ID.

-- 
MST

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH v4] kvm: Use a bitmap for tracking used GSIs
  2009-05-18 12:19                                 ` Michael S. Tsirkin
@ 2009-05-18 12:33                                   ` Avi Kivity
  2009-05-18 13:45                                     ` Michael S. Tsirkin
  0 siblings, 1 reply; 56+ messages in thread
From: Avi Kivity @ 2009-05-18 12:33 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: Alex Williamson, kvm, sheng.yang

Michael S. Tsirkin wrote:
>>
>>> On the other hand, in MSI-X mask bit is mandatory, not optional
>>> so we'll have to support it for assigned devices at some point.
>>>
>>> If we are worried about speed of masking/unmasking MSI-X interrupts for
>>> assigned devices (older kernels used to mask them, recent kernels leave
>>> this to drivers) we will probably need to have MSI-X support in the
>>> kernel, and have kernel examine the mask bit before injecting the
>>> interrupt, just like real devices do.
>>>   
>>>       
>> Yes.
>>     
>
> Actually, if we do that, we'll need to address a race where a driver
> has updated the mask bit in the window after we tested it
> and before we inject the interrupt. Not sure how to do this.
>   

The driver can't tell if the interrupt came first, so it's a valid race 
(real hardware has the same race).

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH v4] kvm: Use a bitmap for tracking used GSIs
  2009-05-18 12:33                                   ` Avi Kivity
@ 2009-05-18 13:45                                     ` Michael S. Tsirkin
  2009-05-18 13:55                                       ` Avi Kivity
  0 siblings, 1 reply; 56+ messages in thread
From: Michael S. Tsirkin @ 2009-05-18 13:45 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Alex Williamson, kvm, sheng.yang

On Mon, May 18, 2009 at 03:33:20PM +0300, Avi Kivity wrote:
> Michael S. Tsirkin wrote:
>>>
>>>> On the other hand, in MSI-X mask bit is mandatory, not optional
>>>> so we'll have to support it for assigned devices at some point.
>>>>
>>>> If we are worried about speed of masking/unmasking MSI-X interrupts for
>>>> assigned devices (older kernels used to mask them, recent kernels leave
>>>> this to drivers) we will probably need to have MSI-X support in the
>>>> kernel, and have kernel examine the mask bit before injecting the
>>>> interrupt, just like real devices do.
>>>>         
>>> Yes.
>>>     
>>
>> Actually, if we do that, we'll need to address a race where a driver
>> has updated the mask bit in the window after we tested it
>> and before we inject the interrupt. Not sure how to do this.
>>   
>
> The driver can't tell if the interrupt came first, so it's a valid race  
> (real hardware has the same race).

The driver for real device can do a read followed by sync_irq to flush
out interrupts.

-- 
MST

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH v4] kvm: Use a bitmap for tracking used GSIs
  2009-05-18 13:45                                     ` Michael S. Tsirkin
@ 2009-05-18 13:55                                       ` Avi Kivity
  2009-05-18 14:40                                         ` Michael S. Tsirkin
  0 siblings, 1 reply; 56+ messages in thread
From: Avi Kivity @ 2009-05-18 13:55 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: Alex Williamson, kvm, sheng.yang

Michael S. Tsirkin wrote:
> On Mon, May 18, 2009 at 03:33:20PM +0300, Avi Kivity wrote:
>   
>> Michael S. Tsirkin wrote:
>>     
>>>>> On the other hand, in MSI-X mask bit is mandatory, not optional
>>>>> so we'll have to support it for assigned devices at some point.
>>>>>
>>>>> If we are worried about speed of masking/unmasking MSI-X interrupts for
>>>>> assigned devices (older kernels used to mask them, recent kernels leave
>>>>> this to drivers) we will probably need to have MSI-X support in the
>>>>> kernel, and have kernel examine the mask bit before injecting the
>>>>> interrupt, just like real devices do.
>>>>>         
>>>>>           
>>>> Yes.
>>>>     
>>>>         
>>> Actually, if we do that, we'll need to address a race where a driver
>>> has updated the mask bit in the window after we tested it
>>> and before we inject the interrupt. Not sure how to do this.
>>>   
>>>       
>> The driver can't tell if the interrupt came first, so it's a valid race  
>> (real hardware has the same race).
>>     
>
> The driver for real device can do a read followed by sync_irq to flush
> out interrupts.
>   

If it generates the interrupt after masking it in the msi-x entry, we'll 
see it.  If it generates the interrupt before masking it, it may or may 
not receive the interrupt, even on real hardware.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH v4] kvm: Use a bitmap for tracking used GSIs
  2009-05-18 13:55                                       ` Avi Kivity
@ 2009-05-18 14:40                                         ` Michael S. Tsirkin
  2009-05-18 14:46                                           ` Avi Kivity
  0 siblings, 1 reply; 56+ messages in thread
From: Michael S. Tsirkin @ 2009-05-18 14:40 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Alex Williamson, kvm, sheng.yang

On Mon, May 18, 2009 at 04:55:30PM +0300, Avi Kivity wrote:
> Michael S. Tsirkin wrote:
>> On Mon, May 18, 2009 at 03:33:20PM +0300, Avi Kivity wrote:
>>   
>>> Michael S. Tsirkin wrote:
>>>     
>>>>>> On the other hand, in MSI-X mask bit is mandatory, not optional
>>>>>> so we'll have to support it for assigned devices at some point.
>>>>>>
>>>>>> If we are worried about speed of masking/unmasking MSI-X interrupts for
>>>>>> assigned devices (older kernels used to mask them, recent kernels leave
>>>>>> this to drivers) we will probably need to have MSI-X support in the
>>>>>> kernel, and have kernel examine the mask bit before injecting the
>>>>>> interrupt, just like real devices do.
>>>>>>                   
>>>>> Yes.
>>>>>             
>>>> Actually, if we do that, we'll need to address a race where a driver
>>>> has updated the mask bit in the window after we tested it
>>>> and before we inject the interrupt. Not sure how to do this.
>>>>         
>>> The driver can't tell if the interrupt came first, so it's a valid 
>>> race  (real hardware has the same race).
>>>     
>>
>> The driver for real device can do a read followed by sync_irq to flush
>> out interrupts.
>>   
>
> If it generates the interrupt after masking it in the msi-x entry, we'll  
> see it.  If it generates the interrupt before masking it, it may or may  
> not receive the interrupt, even on real hardware.

Yes but in the later case, real hardware must re-send the pending
interrupt after it is unmasked (that's the spec). We would just lose it.

-- 
MST

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH v4] kvm: Use a bitmap for tracking used GSIs
  2009-05-18 14:40                                         ` Michael S. Tsirkin
@ 2009-05-18 14:46                                           ` Avi Kivity
  2009-05-18 15:01                                             ` Michael S. Tsirkin
  0 siblings, 1 reply; 56+ messages in thread
From: Avi Kivity @ 2009-05-18 14:46 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: Alex Williamson, kvm, sheng.yang

Michael S. Tsirkin wrote:
>> If it generates the interrupt after masking it in the msi-x entry, we'll  
>> see it.  If it generates the interrupt before masking it, it may or may  
>> not receive the interrupt, even on real hardware.
>>     
>
> Yes but in the later case, real hardware must re-send the pending
> interrupt after it is unmasked (that's the spec). We would just lose it.
>   

That's a different matter.  We need to buffer the interrupt pending bit, 
and a way for userspace to either query that buffer or have a 
conditional injection (inject_if_pending).

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH v4] kvm: Use a bitmap for tracking used GSIs
  2009-05-18 14:46                                           ` Avi Kivity
@ 2009-05-18 15:01                                             ` Michael S. Tsirkin
  2009-05-18 15:08                                               ` Avi Kivity
  0 siblings, 1 reply; 56+ messages in thread
From: Michael S. Tsirkin @ 2009-05-18 15:01 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Alex Williamson, kvm, sheng.yang

On Mon, May 18, 2009 at 05:46:09PM +0300, Avi Kivity wrote:
> Michael S. Tsirkin wrote:
>>> If it generates the interrupt after masking it in the msi-x entry, 
>>> we'll  see it.  If it generates the interrupt before masking it, it 
>>> may or may  not receive the interrupt, even on real hardware.
>>>     
>>
>> Yes but in the later case, real hardware must re-send the pending
>> interrupt after it is unmasked (that's the spec). We would just lose it.
>>   
>
> That's a different matter.  We need to buffer the interrupt pending bit,  
> and a way for userspace to either query that buffer or have a  
> conditional injection (inject_if_pending).

Here's the race as I see it: we discussed the possibility
of making kernel and user share and actual memory page,
and using that for MSI-X tables.

	host kernel want to send msi x message
	host kernel test mask bit: unmasked
	guest sets mask bit
	guest does read to flash msi writes
	guest does sync irq and makes sure there are no
                           outstanging interrupts
	---> at this stage guest expects not to get interrupts
	guest starts editing msix entry

	host kernel never saw mask so it sends message to the old address
	       or even a corrupted address which the guest is in
               the middle of editing
	bad things happen

This race is not easy to solve, except by catching writes to msix table,
and syncronising them with interrupt delivery.

-- 
MST

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH v4] kvm: Use a bitmap for tracking used GSIs
  2009-05-18 15:01                                             ` Michael S. Tsirkin
@ 2009-05-18 15:08                                               ` Avi Kivity
  0 siblings, 0 replies; 56+ messages in thread
From: Avi Kivity @ 2009-05-18 15:08 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: Alex Williamson, kvm, sheng.yang

Michael S. Tsirkin wrote:
> Here's the race as I see it: we discussed the possibility
> of making kernel and user share and actual memory page,
> and using that for MSI-X tables.
>
> 	host kernel want to send msi x message
> 	host kernel test mask bit: unmasked
> 	guest sets mask bit
> 	guest does read to flash msi writes
> 	guest does sync irq and makes sure there are no
>                            outstanging interrupts
> 	---> at this stage guest expects not to get interrupts
> 	guest starts editing msix entry
>
> 	host kernel never saw mask so it sends message to the old address
> 	       or even a corrupted address which the guest is in
>                the middle of editing
> 	bad things happen
>
> This race is not easy to solve, except by catching writes to msix table,
> and syncronising them with interrupt delivery.
>   

You're right of course.  In any case this is premature, we'll have to 
see if this happens with any frequency.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH v6] kvm: Use a bitmap for tracking used GSIs
  2009-05-17 20:54           ` Avi Kivity
@ 2009-05-18 22:32             ` Alex Williamson
  2009-05-19  8:01               ` Avi Kivity
  0 siblings, 1 reply; 56+ messages in thread
From: Alex Williamson @ 2009-05-18 22:32 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, sheng.yang, mst

On Sun, 2009-05-17 at 23:54 +0300, Avi Kivity wrote:
> Alex Williamson wrote:
> > We're currently using a counter to track the most recent GSI we've
> > handed out.  This quickly hits KVM_MAX_IRQ_ROUTES when using device
> > assignment with a driver that regularly toggles the MSI enable bit.
> > This can mean only a few minutes of usable run time.  Instead, track
> > used GSIs in a bitmap.
> >
> >  v6: Make use of ALIGN macro, per Michael
> >      Define KVM_IOAPIC_NUM_PINS if not already, per Michael
> >   
> 
> Due to me being slow I nacked this after you prepared the new patch.  Sorry.
> 
> We could define have platform_gsis passed from qemu to tell us how many 
> GSIs to reserve (let's pretend libkvm isn't on death row for a moment).

Perhaps we should update the bitmap on entry points that everyone uses
so we don't have to worry about preallocating.  We could set the bitmap
in kvm_add_routing_entry() and clear it in kvm_del_routing_entry().
This would mean that kvm_del_routing_entry() implicitly gives up a GSI
obtained via kvm_get_irq_route_gsi(), which seems to be the assumption
already.

That would eliminate any need for proliferating KVM_CAP_IRQ_ROUTING
ifdefs or doing anything based on KVM_IOAPIC_NUM_PINS, but should I keep
the KVM_CAP_IRQ_ROUTING around the new code for documentation purposes?
Thanks,

Alex


^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH v6] kvm: Use a bitmap for tracking used GSIs
  2009-05-18 22:32             ` Alex Williamson
@ 2009-05-19  8:01               ` Avi Kivity
  0 siblings, 0 replies; 56+ messages in thread
From: Avi Kivity @ 2009-05-19  8:01 UTC (permalink / raw)
  To: Alex Williamson; +Cc: kvm, sheng.yang, mst

Alex Williamson wrote:
> Perhaps we should update the bitmap on entry points that everyone uses
> so we don't have to worry about preallocating.  We could set the bitmap
> in kvm_add_routing_entry() and clear it in kvm_del_routing_entry().
> This would mean that kvm_del_routing_entry() implicitly gives up a GSI
> obtained via kvm_get_irq_route_gsi(), which seems to be the assumption
> already.
>
>   

Much better.

> That would eliminate any need for proliferating KVM_CAP_IRQ_ROUTING
> ifdefs or doing anything based on KVM_IOAPIC_NUM_PINS, but should I keep
> the KVM_CAP_IRQ_ROUTING around the new code for documentation purposes

Only around code which directly uses the routing facilities (i.e. only 
in the libkvm wrappers).  Code in qemu should only do runtime detection.

I really should write Documentation/kvm/extensions.txt.  And ioctls.txt, 
and intro.txt...

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 56+ messages in thread

* [PATCH v7] kvm: Use a bitmap for tracking used GSIs
  2009-05-13 17:28         ` [PATCH v6] " Alex Williamson
  2009-05-13 18:46           ` Michael S. Tsirkin
  2009-05-17 20:54           ` Avi Kivity
@ 2009-05-19 20:48           ` Alex Williamson
  2009-05-20 11:55             ` Avi Kivity
  2 siblings, 1 reply; 56+ messages in thread
From: Alex Williamson @ 2009-05-19 20:48 UTC (permalink / raw)
  To: kvm, sheng.yang, mst, avi; +Cc: alex.williamson

We're currently using a counter to track the most recent GSI we've
handed out.  This quickly hits KVM_MAX_IRQ_ROUTES when using device
assignment with a driver that regularly toggles the MSI enable bit
(such as Linux kernels 2.6.21-26).  This can mean only a few minutes
of usable run time.  Instead, track used GSIs in a bitmap.

Signed-off-by: Alex Williamson <alex.williamson@hp.com>
---

 v2: Added mutex to protect gsi bitmap
 v3: Updated for comments from Michael Tsirkin
     No longer depends on "[PATCH] kvm: device-assignment: Catch GSI overflow"
 v4: Fix gsi_bytes calculation noted by Sheng Yang
 v5: Remove mutex per Avi
     Fix negative gsi_count path per Michael
     Remove KVM_CAP_IRQ_ROUTING per Michael, ppc should still be protected
         by the KVM_IOAPIC_NUM_PINS check
 v6: Make use of ALIGN macro, per Michael
     Define KVM_IOAPIC_NUM_PINS if not already, per Michael
     Fix comment indent, per Michael
     Remove unused BITMAP_SIZE macro
 v7: Don't define KVM_IOAPIC_NUM_PINS, mark bitmap in common
         paths so we can stay blissfully ignorant of ioapics

 kvm/libkvm/kvm-common.h |    3 +-
 kvm/libkvm/libkvm.c     |   91 +++++++++++++++++++++++++++++++++++++----------
 2 files changed, 73 insertions(+), 21 deletions(-)

diff --git a/kvm/libkvm/kvm-common.h b/kvm/libkvm/kvm-common.h
index 591fb53..c95c591 100644
--- a/kvm/libkvm/kvm-common.h
+++ b/kvm/libkvm/kvm-common.h
@@ -67,7 +67,8 @@ struct kvm_context {
 	struct kvm_irq_routing *irq_routes;
 	int nr_allocated_irq_routes;
 #endif
-	int max_used_gsi;
+	void *used_gsi_bitmap;
+	int max_gsi;
 };
 
 int kvm_alloc_kernel_memory(kvm_context_t kvm, unsigned long memory,
diff --git a/kvm/libkvm/libkvm.c b/kvm/libkvm/libkvm.c
index ba0a5d1..c5d6a7f 100644
--- a/kvm/libkvm/libkvm.c
+++ b/kvm/libkvm/libkvm.c
@@ -61,10 +61,32 @@
 #define DPRINTF(fmt, args...) do {} while (0)
 #endif
 
+#define MIN(x,y) ((x) < (y) ? (x) : (y))
+#define ALIGN(x, y) (((x)+(y)-1) & ~((y)-1))
 
 int kvm_abi = EXPECTED_KVM_API_VERSION;
 int kvm_page_size;
 
+static inline void set_gsi(kvm_context_t kvm, unsigned int gsi)
+{
+	uint32_t *bitmap = kvm->used_gsi_bitmap;
+
+	if (gsi < kvm->max_gsi)
+		bitmap[gsi / 32] |= 1U << (gsi % 32);
+	else
+		DPRINTF("Invalid GSI %d\n");
+}
+
+static inline void clear_gsi(kvm_context_t kvm, unsigned int gsi)
+{
+	uint32_t *bitmap = kvm->used_gsi_bitmap;
+
+	if (gsi < kvm->max_gsi)
+		bitmap[gsi / 32] &= ~(1U << (gsi % 32));
+	else
+		DPRINTF("Invalid GSI %d\n");
+}
+
 struct slot_info {
 	unsigned long phys_addr;
 	unsigned long len;
@@ -285,7 +307,7 @@ kvm_context_t kvm_init(struct kvm_callbacks *callbacks,
 {
 	int fd;
 	kvm_context_t kvm;
-	int r;
+	int r, gsi_count;
 
 	fd = open("/dev/kvm", O_RDWR);
 	if (fd == -1) {
@@ -323,6 +345,23 @@ kvm_context_t kvm_init(struct kvm_callbacks *callbacks,
 	kvm->no_irqchip_creation = 0;
 	kvm->no_pit_creation = 0;
 
+	gsi_count = kvm_get_gsi_count(kvm);
+	if (gsi_count > 0) {
+		int gsi_bits, i;
+
+		/* Round up so we can search ints using ffs */
+		gsi_bits = ALIGN(gsi_count, 32);
+		kvm->used_gsi_bitmap = malloc(gsi_bits / 8);
+		if (!kvm->used_gsi_bitmap)
+			goto out_close;
+		memset(kvm->used_gsi_bitmap, 0, gsi_bits / 8);
+		kvm->max_gsi = gsi_bits;
+
+		/* Mark any over-allocated bits as already in use */
+		for (i = gsi_count; i < gsi_bits; i++)
+			set_gsi(kvm, i);
+	}
+
 	return kvm;
  out_close:
 	close(fd);
@@ -626,9 +665,6 @@ int kvm_get_dirty_pages(kvm_context_t kvm, unsigned long phys_addr, void *buf)
 	return kvm_get_map(kvm, KVM_GET_DIRTY_LOG, slot, buf);
 }
 
-#define ALIGN(x, y)  (((x)+(y)-1) & ~((y)-1))
-#define BITMAP_SIZE(m) (ALIGN(((m)/PAGE_SIZE), sizeof(long) * 8) / 8)
-
 int kvm_get_dirty_pages_range(kvm_context_t kvm, unsigned long phys_addr,
 			      unsigned long len, void *buf, void *opaque,
 			      int (*cb)(unsigned long start, unsigned long len,
@@ -1298,8 +1334,8 @@ int kvm_add_routing_entry(kvm_context_t kvm,
 	new->flags = entry->flags;
 	new->u = entry->u;
 
-	if (entry->gsi > kvm->max_used_gsi)
-		kvm->max_used_gsi = entry->gsi;
+	set_gsi(kvm, entry->gsi);
+
 	return 0;
 #else
 	return -ENOSYS;
@@ -1327,12 +1363,14 @@ int kvm_del_routing_entry(kvm_context_t kvm,
 {
 #ifdef KVM_CAP_IRQ_ROUTING
 	struct kvm_irq_routing_entry *e, *p;
-	int i, found = 0;
+	int i, gsi, found = 0;
+
+	gsi = entry->gsi;
 
 	for (i = 0; i < kvm->irq_routes->nr; ++i) {
 		e = &kvm->irq_routes->entries[i];
 		if (e->type == entry->type
-		    && e->gsi == entry->gsi) {
+		    && e->gsi == gsi) {
 			switch (e->type)
 			{
 			case KVM_IRQ_ROUTING_IRQCHIP: {
@@ -1363,8 +1401,19 @@ int kvm_del_routing_entry(kvm_context_t kvm,
 			default:
 				break;
 			}
-			if (found)
+			if (found) {
+				/* If there are no other users of this GSI
+				 * mark it available in the bitmap */
+				for (i = 0; i < kvm->irq_routes->nr; i++) {
+					e = &kvm->irq_routes->entries[i];
+					if (e->gsi == gsi)
+						break;
+				}
+				if (i == kvm->irq_routes->nr)
+					clear_gsi(kvm, gsi);
+
 				return 0;
+			}
 		}
 	}
 	return -ESRCH;
@@ -1406,17 +1455,19 @@ int kvm_commit_irq_routes(kvm_context_t kvm)
 
 int kvm_get_irq_route_gsi(kvm_context_t kvm)
 {
-#ifdef KVM_CAP_IRQ_ROUTING
-	if (kvm->max_used_gsi >= KVM_IOAPIC_NUM_PINS)  {
-	    if (kvm->max_used_gsi <= kvm_get_gsi_count(kvm))
-                return kvm->max_used_gsi + 1;
-            else
-                return -ENOSPC;
-        } else
-            return KVM_IOAPIC_NUM_PINS;
-#else
-	return -ENOSYS;
-#endif
+	int i, bit;
+	uint32_t *buf = kvm->used_gsi_bitmap;
+
+	/* Return the lowest unused GSI in the bitmap */
+	for (i = 0; i < kvm->max_gsi / 32; i++) {
+		bit = ffs(~buf[i]);
+		if (!bit)
+			continue;
+
+		return bit - 1 + i * 32;
+	}
+
+	return -ENOSPC;
 }
 
 #ifdef KVM_CAP_DEVICE_MSIX


^ permalink raw reply related	[flat|nested] 56+ messages in thread

* Re: [PATCH v7] kvm: Use a bitmap for tracking used GSIs
  2009-05-19 20:48           ` [PATCH v7] " Alex Williamson
@ 2009-05-20 11:55             ` Avi Kivity
  0 siblings, 0 replies; 56+ messages in thread
From: Avi Kivity @ 2009-05-20 11:55 UTC (permalink / raw)
  To: Alex Williamson; +Cc: kvm, sheng.yang, mst

Alex Williamson wrote:
> We're currently using a counter to track the most recent GSI we've
> handed out.  This quickly hits KVM_MAX_IRQ_ROUTES when using device
> assignment with a driver that regularly toggles the MSI enable bit
> (such as Linux kernels 2.6.21-26).  This can mean only a few minutes
> of usable run time.  Instead, track used GSIs in a bitmap.
>
>   

Applied, thanks.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 56+ messages in thread

end of thread, other threads:[~2009-05-20 11:55 UTC | newest]

Thread overview: 56+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-05-07 22:22 [PATCH] kvm: Use a bitmap for tracking used GSIs Alex Williamson
2009-05-08 22:31 ` [PATCH v2] " Alex Williamson
2009-05-12 19:39   ` Michael S. Tsirkin
2009-05-12 21:56     ` Alex Williamson
2009-05-12 22:07   ` [PATCH v3] " Alex Williamson
2009-05-13  3:30     ` Yang, Sheng
2009-05-13  3:42       ` Alex Williamson
2009-05-13  4:10         ` Alex Williamson
2009-05-13  4:15           ` Yang, Sheng
2009-05-13  4:41     ` [PATCH v4] " Alex Williamson
2009-05-13  4:58       ` Yang, Sheng
2009-05-13  9:47       ` Avi Kivity
2009-05-13 12:28         ` Alex Williamson
2009-05-13 12:35           ` Avi Kivity
2009-05-13 12:55             ` Alex Williamson
2009-05-13 13:00               ` Avi Kivity
2009-05-13 13:11                 ` Alex Williamson
2009-05-13 13:55                   ` Michael S. Tsirkin
2009-05-13 14:15                     ` Alex Williamson
2009-05-13 14:30                       ` Michael S. Tsirkin
2009-05-13 14:33                       ` Alex Williamson
2009-05-13 23:07                         ` Alex Williamson
2009-05-17 20:47                           ` Avi Kivity
2009-05-18 11:12                             ` Michael S. Tsirkin
2009-05-18 11:36                               ` Avi Kivity
2009-05-18 12:19                                 ` Michael S. Tsirkin
2009-05-18 12:33                                   ` Avi Kivity
2009-05-18 13:45                                     ` Michael S. Tsirkin
2009-05-18 13:55                                       ` Avi Kivity
2009-05-18 14:40                                         ` Michael S. Tsirkin
2009-05-18 14:46                                           ` Avi Kivity
2009-05-18 15:01                                             ` Michael S. Tsirkin
2009-05-18 15:08                                               ` Avi Kivity
2009-05-13 14:32       ` Michael S. Tsirkin
2009-05-13 15:13       ` [PATCH v5] " Alex Williamson
2009-05-13 16:05         ` Michael S. Tsirkin
2009-05-13 17:13           ` Alex Williamson
2009-05-17 20:51           ` Avi Kivity
2009-05-13 17:28         ` [PATCH v6] " Alex Williamson
2009-05-13 18:46           ` Michael S. Tsirkin
2009-05-17 20:54           ` Avi Kivity
2009-05-18 22:32             ` Alex Williamson
2009-05-19  8:01               ` Avi Kivity
2009-05-19 20:48           ` [PATCH v7] " Alex Williamson
2009-05-20 11:55             ` Avi Kivity
2009-05-13  7:03     ` [PATCH v3] " Michael S. Tsirkin
2009-05-13 12:15       ` Alex Williamson
2009-05-13  7:04     ` Michael S. Tsirkin
2009-05-13 12:19       ` Alex Williamson
2009-05-13 14:25         ` Michael S. Tsirkin
2009-05-17 20:49         ` Avi Kivity
2009-05-11 12:00 ` [PATCH] " Yang, Sheng
2009-05-12 18:45   ` Alex Williamson
2009-05-12 19:06     ` Alex Williamson
2009-05-12 19:10       ` Michael S. Tsirkin
2009-05-12 19:23         ` Alex Williamson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.