All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] kvm: use insert sort in kvm_io_bus_register_dev function
@ 2018-01-16 13:34 Gal Hammer
  2018-01-16 13:45 ` Paolo Bonzini
                   ` (3 more replies)
  0 siblings, 4 replies; 8+ messages in thread
From: Gal Hammer @ 2018-01-16 13:34 UTC (permalink / raw)
  To: kvm; +Cc: rkrcmar, pbonzini, Gal Hammer

The loading time of a VM is quite significant with a CPU usage
reaching 100% when loading a VM that its virtio devices use a
large amount of virt-queues (e.g. a virtio-serial device with
max_ports=511). Most of the time is spend in re-sorting the
kvm_io_bus kvm_io_range array when a new eventfd is registered.

The patch replaces the existing method with an insert sort.

Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Uri Lublin <ulublin@redhat.com>
Signed-off-by: Gal Hammer <ghammer@redhat.com>
---
 virt/kvm/kvm_main.c | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 210bf82..1e467fc 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -3419,21 +3419,6 @@ static int kvm_io_bus_sort_cmp(const void *p1, const void *p2)
 	return kvm_io_bus_cmp(p1, p2);
 }
 
-static int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev,
-			  gpa_t addr, int len)
-{
-	bus->range[bus->dev_count++] = (struct kvm_io_range) {
-		.addr = addr,
-		.len = len,
-		.dev = dev,
-	};
-
-	sort(bus->range, bus->dev_count, sizeof(struct kvm_io_range),
-		kvm_io_bus_sort_cmp, NULL);
-
-	return 0;
-}
-
 static int kvm_io_bus_get_first_dev(struct kvm_io_bus *bus,
 			     gpa_t addr, int len)
 {
@@ -3574,7 +3559,9 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
 int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
 			    int len, struct kvm_io_device *dev)
 {
+	int i;
 	struct kvm_io_bus *new_bus, *bus;
+	struct kvm_io_range range;
 
 	bus = kvm_get_bus(kvm, bus_idx);
 	if (!bus)
@@ -3588,9 +3575,22 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
 			  sizeof(struct kvm_io_range)), GFP_KERNEL);
 	if (!new_bus)
 		return -ENOMEM;
-	memcpy(new_bus, bus, sizeof(*bus) + (bus->dev_count *
-	       sizeof(struct kvm_io_range)));
-	kvm_io_bus_insert_dev(new_bus, dev, addr, len);
+
+	range = (struct kvm_io_range) {
+		.addr = addr,
+		.len = len,
+		.dev = dev,
+	};
+
+	for (i = 0; i < bus->dev_count; i++)
+		if (kvm_io_bus_cmp(&bus->range[i], &range) > 0)
+			break;
+
+	memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct kvm_io_range));
+	new_bus->dev_count++;
+	new_bus->range[i] = range;
+	memcpy(new_bus->range + i + 1, bus->range + i,
+		(bus->dev_count - i) * sizeof(struct kvm_io_range));
 	rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
 	synchronize_srcu_expedited(&kvm->srcu);
 	kfree(bus);
-- 
2.7.5

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] kvm: use insert sort in kvm_io_bus_register_dev function
  2018-01-16 13:34 [PATCH] kvm: use insert sort in kvm_io_bus_register_dev function Gal Hammer
@ 2018-01-16 13:45 ` Paolo Bonzini
  2018-01-16 17:36 ` David Hildenbrand
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 8+ messages in thread
From: Paolo Bonzini @ 2018-01-16 13:45 UTC (permalink / raw)
  To: Gal Hammer, kvm; +Cc: rkrcmar

On 16/01/2018 14:34, Gal Hammer wrote:
> The loading time of a VM is quite significant with a CPU usage
> reaching 100% when loading a VM that its virtio devices use a
> large amount of virt-queues (e.g. a virtio-serial device with
> max_ports=511). Most of the time is spend in re-sorting the
> kvm_io_bus kvm_io_range array when a new eventfd is registered.
> 
> The patch replaces the existing method with an insert sort.
> 
> Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
> Reviewed-by: Uri Lublin <ulublin@redhat.com>
> Signed-off-by: Gal Hammer <ghammer@redhat.com>
> ---
>  virt/kvm/kvm_main.c | 36 ++++++++++++++++++------------------
>  1 file changed, 18 insertions(+), 18 deletions(-)
> 
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 210bf82..1e467fc 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -3419,21 +3419,6 @@ static int kvm_io_bus_sort_cmp(const void *p1, const void *p2)
>  	return kvm_io_bus_cmp(p1, p2);
>  }
>  
> -static int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev,
> -			  gpa_t addr, int len)
> -{
> -	bus->range[bus->dev_count++] = (struct kvm_io_range) {
> -		.addr = addr,
> -		.len = len,
> -		.dev = dev,
> -	};
> -
> -	sort(bus->range, bus->dev_count, sizeof(struct kvm_io_range),
> -		kvm_io_bus_sort_cmp, NULL);
> -
> -	return 0;
> -}
> -
>  static int kvm_io_bus_get_first_dev(struct kvm_io_bus *bus,
>  			     gpa_t addr, int len)
>  {
> @@ -3574,7 +3559,9 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
>  int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
>  			    int len, struct kvm_io_device *dev)
>  {
> +	int i;
>  	struct kvm_io_bus *new_bus, *bus;
> +	struct kvm_io_range range;
>  
>  	bus = kvm_get_bus(kvm, bus_idx);
>  	if (!bus)
> @@ -3588,9 +3575,22 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
>  			  sizeof(struct kvm_io_range)), GFP_KERNEL);
>  	if (!new_bus)
>  		return -ENOMEM;
> -	memcpy(new_bus, bus, sizeof(*bus) + (bus->dev_count *
> -	       sizeof(struct kvm_io_range)));
> -	kvm_io_bus_insert_dev(new_bus, dev, addr, len);
> +
> +	range = (struct kvm_io_range) {
> +		.addr = addr,
> +		.len = len,
> +		.dev = dev,
> +	};
> +
> +	for (i = 0; i < bus->dev_count; i++)
> +		if (kvm_io_bus_cmp(&bus->range[i], &range) > 0)
> +			break;
> +
> +	memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct kvm_io_range));
> +	new_bus->dev_count++;
> +	new_bus->range[i] = range;
> +	memcpy(new_bus->range + i + 1, bus->range + i,
> +		(bus->dev_count - i) * sizeof(struct kvm_io_range));
>  	rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
>  	synchronize_srcu_expedited(&kvm->srcu);
>  	kfree(bus);
> 

Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Cc: stable@vger.kernel.org

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] kvm: use insert sort in kvm_io_bus_register_dev function
  2018-01-16 13:34 [PATCH] kvm: use insert sort in kvm_io_bus_register_dev function Gal Hammer
  2018-01-16 13:45 ` Paolo Bonzini
@ 2018-01-16 17:36 ` David Hildenbrand
       [not found]   ` <CAA2ifQzRGjEeEPiQmsqF6ou-oqsv5Px=i9KyxdnUfu80Ae_O1w@mail.gmail.com>
  2018-01-25  7:48 ` Wanpeng Li
  2018-02-14 18:11 ` Paolo Bonzini
  3 siblings, 1 reply; 8+ messages in thread
From: David Hildenbrand @ 2018-01-16 17:36 UTC (permalink / raw)
  To: Gal Hammer, kvm; +Cc: rkrcmar, pbonzini

On 16.01.2018 14:34, Gal Hammer wrote:
> The loading time of a VM is quite significant with a CPU usage
> reaching 100% when loading a VM that its virtio devices use a
> large amount of virt-queues (e.g. a virtio-serial device with
> max_ports=511). Most of the time is spend in re-sorting the
> kvm_io_bus kvm_io_range array when a new eventfd is registered.
> 
> The patch replaces the existing method with an insert sort.
> 
> Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
> Reviewed-by: Uri Lublin <ulublin@redhat.com>
> Signed-off-by: Gal Hammer <ghammer@redhat.com>
> ---
>  virt/kvm/kvm_main.c | 36 ++++++++++++++++++------------------
>  1 file changed, 18 insertions(+), 18 deletions(-)
> 
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 210bf82..1e467fc 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -3419,21 +3419,6 @@ static int kvm_io_bus_sort_cmp(const void *p1, const void *p2)
>  	return kvm_io_bus_cmp(p1, p2);
>  }
>  
> -static int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev,
> -			  gpa_t addr, int len)
> -{
> -	bus->range[bus->dev_count++] = (struct kvm_io_range) {
> -		.addr = addr,
> -		.len = len,
> -		.dev = dev,
> -	};
> -
> -	sort(bus->range, bus->dev_count, sizeof(struct kvm_io_range),
> -		kvm_io_bus_sort_cmp, NULL);
> -
> -	return 0;
> -}
> -
>  static int kvm_io_bus_get_first_dev(struct kvm_io_bus *bus,
>  			     gpa_t addr, int len)
>  {
> @@ -3574,7 +3559,9 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
>  int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
>  			    int len, struct kvm_io_device *dev)
>  {
> +	int i;
>  	struct kvm_io_bus *new_bus, *bus;
> +	struct kvm_io_range range;
>  
>  	bus = kvm_get_bus(kvm, bus_idx);
>  	if (!bus)
> @@ -3588,9 +3575,22 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
>  			  sizeof(struct kvm_io_range)), GFP_KERNEL);
>  	if (!new_bus)
>  		return -ENOMEM;
> -	memcpy(new_bus, bus, sizeof(*bus) + (bus->dev_count *
> -	       sizeof(struct kvm_io_range)));
> -	kvm_io_bus_insert_dev(new_bus, dev, addr, len);
> +
> +	range = (struct kvm_io_range) {
> +		.addr = addr,
> +		.len = len,
> +		.dev = dev,
> +	};

initialize directly when defining it?

> +
> +	for (i = 0; i < bus->dev_count; i++)
> +		if (kvm_io_bus_cmp(&bus->range[i], &range) > 0)
> +			break;
> +
> +	memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct kvm_io_range));
> +	new_bus->dev_count++;
> +	new_bus->range[i] = range;
> +	memcpy(new_bus->range + i + 1, bus->range + i,
> +		(bus->dev_count - i) * sizeof(struct kvm_io_range));

These offset/size/length calculations are just ugly. Wonder if that can
be written any nicer.

>  	rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
>  	synchronize_srcu_expedited(&kvm->srcu);
>  	kfree(bus);
> 


-- 

Thanks,

David / dhildenb

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] kvm: use insert sort in kvm_io_bus_register_dev function
       [not found]   ` <CAA2ifQzRGjEeEPiQmsqF6ou-oqsv5Px=i9KyxdnUfu80Ae_O1w@mail.gmail.com>
@ 2018-01-17  9:11     ` David Hildenbrand
  0 siblings, 0 replies; 8+ messages in thread
From: David Hildenbrand @ 2018-01-17  9:11 UTC (permalink / raw)
  To: Gal Hammer; +Cc: kvm


>     initialize directly when defining it?
> 
> 
> ​Since the function might exit before range is initialized, I don't see
> a reason to do it on definion.​

Cleaner and we don't optimize for error paths.

>  
> 
>     > +
>     > +     for (i = 0; i < bus->dev_count; i++)
>     > +             if (kvm_io_bus_cmp(&bus->range[i], &range) > 0)
>     > +                     break;
>     > +
>     > +     memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct kvm_io_range));
>     > +     new_bus->dev_count++;
>     > +     new_bus->range[i] = range;
>     > +     memcpy(new_bus->range + i + 1, bus->range + i,
>     > +             (bus->dev_count - i) * sizeof(struct kvm_io_range));
> 
>     These offset/size/length calculations are just ugly. Wonder if that can
>     be written any nicer.
> 
> 
> ​I really don't know how to response to that. It might be ugly but it
> has a lovely personality?​
> 
> This is a standard and common pointer arithmetic, but feel free to
> propose another way to implement it.

Will think about it ... But yes, it has personality :)

-- 

Thanks,

David / dhildenb

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] kvm: use insert sort in kvm_io_bus_register_dev function
  2018-01-16 13:34 [PATCH] kvm: use insert sort in kvm_io_bus_register_dev function Gal Hammer
  2018-01-16 13:45 ` Paolo Bonzini
  2018-01-16 17:36 ` David Hildenbrand
@ 2018-01-25  7:48 ` Wanpeng Li
  2018-01-25 14:53   ` Paolo Bonzini
  2018-02-14 18:11 ` Paolo Bonzini
  3 siblings, 1 reply; 8+ messages in thread
From: Wanpeng Li @ 2018-01-25  7:48 UTC (permalink / raw)
  To: Gal Hammer; +Cc: kvm, Radim Krcmar, Paolo Bonzini

2018-01-16 21:34 GMT+08:00 Gal Hammer <ghammer@redhat.com>:
> The loading time of a VM is quite significant with a CPU usage
> reaching 100% when loading a VM that its virtio devices use a
> large amount of virt-queues (e.g. a virtio-serial device with
> max_ports=511). Most of the time is spend in re-sorting the
> kvm_io_bus kvm_io_range array when a new eventfd is registered.
>
> The patch replaces the existing method with an insert sort.

If we should stop to use bsearch when searching io bus later?

Regards,
Wanpeng Li

>
> Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
> Reviewed-by: Uri Lublin <ulublin@redhat.com>
> Signed-off-by: Gal Hammer <ghammer@redhat.com>
> ---
>  virt/kvm/kvm_main.c | 36 ++++++++++++++++++------------------
>  1 file changed, 18 insertions(+), 18 deletions(-)
>
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 210bf82..1e467fc 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -3419,21 +3419,6 @@ static int kvm_io_bus_sort_cmp(const void *p1, const void *p2)
>         return kvm_io_bus_cmp(p1, p2);
>  }
>
> -static int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev,
> -                         gpa_t addr, int len)
> -{
> -       bus->range[bus->dev_count++] = (struct kvm_io_range) {
> -               .addr = addr,
> -               .len = len,
> -               .dev = dev,
> -       };
> -
> -       sort(bus->range, bus->dev_count, sizeof(struct kvm_io_range),
> -               kvm_io_bus_sort_cmp, NULL);
> -
> -       return 0;
> -}
> -
>  static int kvm_io_bus_get_first_dev(struct kvm_io_bus *bus,
>                              gpa_t addr, int len)
>  {
> @@ -3574,7 +3559,9 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
>  int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
>                             int len, struct kvm_io_device *dev)
>  {
> +       int i;
>         struct kvm_io_bus *new_bus, *bus;
> +       struct kvm_io_range range;
>
>         bus = kvm_get_bus(kvm, bus_idx);
>         if (!bus)
> @@ -3588,9 +3575,22 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
>                           sizeof(struct kvm_io_range)), GFP_KERNEL);
>         if (!new_bus)
>                 return -ENOMEM;
> -       memcpy(new_bus, bus, sizeof(*bus) + (bus->dev_count *
> -              sizeof(struct kvm_io_range)));
> -       kvm_io_bus_insert_dev(new_bus, dev, addr, len);
> +
> +       range = (struct kvm_io_range) {
> +               .addr = addr,
> +               .len = len,
> +               .dev = dev,
> +       };
> +
> +       for (i = 0; i < bus->dev_count; i++)
> +               if (kvm_io_bus_cmp(&bus->range[i], &range) > 0)
> +                       break;
> +
> +       memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct kvm_io_range));
> +       new_bus->dev_count++;
> +       new_bus->range[i] = range;
> +       memcpy(new_bus->range + i + 1, bus->range + i,
> +               (bus->dev_count - i) * sizeof(struct kvm_io_range));
>         rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
>         synchronize_srcu_expedited(&kvm->srcu);
>         kfree(bus);
> --
> 2.7.5
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] kvm: use insert sort in kvm_io_bus_register_dev function
  2018-01-25  7:48 ` Wanpeng Li
@ 2018-01-25 14:53   ` Paolo Bonzini
  2018-01-26  6:10     ` Wanpeng Li
  0 siblings, 1 reply; 8+ messages in thread
From: Paolo Bonzini @ 2018-01-25 14:53 UTC (permalink / raw)
  To: Wanpeng Li; +Cc: Gal Hammer, kvm, Radim Krcmar



----- Original Message -----
> From: "Wanpeng Li" <kernellwp@gmail.com>
> To: "Gal Hammer" <ghammer@redhat.com>
> Cc: "kvm" <kvm@vger.kernel.org>, "Radim Krcmar" <rkrcmar@redhat.com>, "Paolo Bonzini" <pbonzini@redhat.com>
> Sent: Thursday, January 25, 2018 8:48:14 AM
> Subject: Re: [PATCH] kvm: use insert sort in kvm_io_bus_register_dev function
> 
> 2018-01-16 21:34 GMT+08:00 Gal Hammer <ghammer@redhat.com>:
> > The loading time of a VM is quite significant with a CPU usage
> > reaching 100% when loading a VM that its virtio devices use a
> > large amount of virt-queues (e.g. a virtio-serial device with
> > max_ports=511). Most of the time is spend in re-sorting the
> > kvm_io_bus kvm_io_range array when a new eventfd is registered.
> >
> > The patch replaces the existing method with an insert sort.
> 
> If we should stop to use bsearch when searching io bus later?

There is an important difference between the two cases.  In Gal's
case, sort() is always O(n log n) and so you get O(n^2 log n) vs
O(n^2) for insertion sort.  Also, heap sort is pretty expensive,
so there is a lot of overhead even before you take into account
indirect calls.

For bsearch(), switching to an open-coded binary search would keep
the same complexity O(log n), only saving the indirect calls.  It
may save a few clock cycles, but it's probably even more effective
to keep a simple one-element cache for the most recently used item,
and then fall back to bsearch().

Paolo

> 
> Regards,
> Wanpeng Li
> 
> >
> > Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
> > Reviewed-by: Uri Lublin <ulublin@redhat.com>
> > Signed-off-by: Gal Hammer <ghammer@redhat.com>
> > ---
> >  virt/kvm/kvm_main.c | 36 ++++++++++++++++++------------------
> >  1 file changed, 18 insertions(+), 18 deletions(-)
> >
> > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > index 210bf82..1e467fc 100644
> > --- a/virt/kvm/kvm_main.c
> > +++ b/virt/kvm/kvm_main.c
> > @@ -3419,21 +3419,6 @@ static int kvm_io_bus_sort_cmp(const void *p1, const
> > void *p2)
> >         return kvm_io_bus_cmp(p1, p2);
> >  }
> >
> > -static int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct
> > kvm_io_device *dev,
> > -                         gpa_t addr, int len)
> > -{
> > -       bus->range[bus->dev_count++] = (struct kvm_io_range) {
> > -               .addr = addr,
> > -               .len = len,
> > -               .dev = dev,
> > -       };
> > -
> > -       sort(bus->range, bus->dev_count, sizeof(struct kvm_io_range),
> > -               kvm_io_bus_sort_cmp, NULL);
> > -
> > -       return 0;
> > -}
> > -
> >  static int kvm_io_bus_get_first_dev(struct kvm_io_bus *bus,
> >                              gpa_t addr, int len)
> >  {
> > @@ -3574,7 +3559,9 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum
> > kvm_bus bus_idx, gpa_t addr,
> >  int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t
> >  addr,
> >                             int len, struct kvm_io_device *dev)
> >  {
> > +       int i;
> >         struct kvm_io_bus *new_bus, *bus;
> > +       struct kvm_io_range range;
> >
> >         bus = kvm_get_bus(kvm, bus_idx);
> >         if (!bus)
> > @@ -3588,9 +3575,22 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum
> > kvm_bus bus_idx, gpa_t addr,
> >                           sizeof(struct kvm_io_range)), GFP_KERNEL);
> >         if (!new_bus)
> >                 return -ENOMEM;
> > -       memcpy(new_bus, bus, sizeof(*bus) + (bus->dev_count *
> > -              sizeof(struct kvm_io_range)));
> > -       kvm_io_bus_insert_dev(new_bus, dev, addr, len);
> > +
> > +       range = (struct kvm_io_range) {
> > +               .addr = addr,
> > +               .len = len,
> > +               .dev = dev,
> > +       };
> > +
> > +       for (i = 0; i < bus->dev_count; i++)
> > +               if (kvm_io_bus_cmp(&bus->range[i], &range) > 0)
> > +                       break;
> > +
> > +       memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct
> > kvm_io_range));
> > +       new_bus->dev_count++;
> > +       new_bus->range[i] = range;
> > +       memcpy(new_bus->range + i + 1, bus->range + i,
> > +               (bus->dev_count - i) * sizeof(struct kvm_io_range));
> >         rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
> >         synchronize_srcu_expedited(&kvm->srcu);
> >         kfree(bus);
> > --
> > 2.7.5
> >
> 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] kvm: use insert sort in kvm_io_bus_register_dev function
  2018-01-25 14:53   ` Paolo Bonzini
@ 2018-01-26  6:10     ` Wanpeng Li
  0 siblings, 0 replies; 8+ messages in thread
From: Wanpeng Li @ 2018-01-26  6:10 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: Gal Hammer, kvm, Radim Krcmar

2018-01-25 22:53 GMT+08:00 Paolo Bonzini <pbonzini@redhat.com>:
>
>
> ----- Original Message -----
>> From: "Wanpeng Li" <kernellwp@gmail.com>
>> To: "Gal Hammer" <ghammer@redhat.com>
>> Cc: "kvm" <kvm@vger.kernel.org>, "Radim Krcmar" <rkrcmar@redhat.com>, "Paolo Bonzini" <pbonzini@redhat.com>
>> Sent: Thursday, January 25, 2018 8:48:14 AM
>> Subject: Re: [PATCH] kvm: use insert sort in kvm_io_bus_register_dev function
>>
>> 2018-01-16 21:34 GMT+08:00 Gal Hammer <ghammer@redhat.com>:
>> > The loading time of a VM is quite significant with a CPU usage
>> > reaching 100% when loading a VM that its virtio devices use a
>> > large amount of virt-queues (e.g. a virtio-serial device with
>> > max_ports=511). Most of the time is spend in re-sorting the
>> > kvm_io_bus kvm_io_range array when a new eventfd is registered.
>> >
>> > The patch replaces the existing method with an insert sort.
>>
>> If we should stop to use bsearch when searching io bus later?
>
> There is an important difference between the two cases.  In Gal's
> case, sort() is always O(n log n) and so you get O(n^2 log n) vs
> O(n^2) for insertion sort.  Also, heap sort is pretty expensive,
> so there is a lot of overhead even before you take into account
> indirect calls.
>
> For bsearch(), switching to an open-coded binary search would keep
> the same complexity O(log n), only saving the indirect calls.  It
> may save a few clock cycles, but it's probably even more effective
> to keep a simple one-element cache for the most recently used item,
> and then fall back to bsearch().

Thanks for the elaboration.

Regards,
Wanpeng Li

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] kvm: use insert sort in kvm_io_bus_register_dev function
  2018-01-16 13:34 [PATCH] kvm: use insert sort in kvm_io_bus_register_dev function Gal Hammer
                   ` (2 preceding siblings ...)
  2018-01-25  7:48 ` Wanpeng Li
@ 2018-02-14 18:11 ` Paolo Bonzini
  3 siblings, 0 replies; 8+ messages in thread
From: Paolo Bonzini @ 2018-02-14 18:11 UTC (permalink / raw)
  To: Gal Hammer, kvm; +Cc: rkrcmar

On 16/01/2018 14:34, Gal Hammer wrote:
> The loading time of a VM is quite significant with a CPU usage
> reaching 100% when loading a VM that its virtio devices use a
> large amount of virt-queues (e.g. a virtio-serial device with
> max_ports=511). Most of the time is spend in re-sorting the
> kvm_io_bus kvm_io_range array when a new eventfd is registered.
> 
> The patch replaces the existing method with an insert sort.
> 
> Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
> Reviewed-by: Uri Lublin <ulublin@redhat.com>
> Signed-off-by: Gal Hammer <ghammer@redhat.com>
> ---
>  virt/kvm/kvm_main.c | 36 ++++++++++++++++++------------------
>  1 file changed, 18 insertions(+), 18 deletions(-)
> 
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 210bf82..1e467fc 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -3419,21 +3419,6 @@ static int kvm_io_bus_sort_cmp(const void *p1, const void *p2)
>  	return kvm_io_bus_cmp(p1, p2);
>  }
>  
> -static int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev,
> -			  gpa_t addr, int len)
> -{
> -	bus->range[bus->dev_count++] = (struct kvm_io_range) {
> -		.addr = addr,
> -		.len = len,
> -		.dev = dev,
> -	};
> -
> -	sort(bus->range, bus->dev_count, sizeof(struct kvm_io_range),
> -		kvm_io_bus_sort_cmp, NULL);
> -
> -	return 0;
> -}
> -
>  static int kvm_io_bus_get_first_dev(struct kvm_io_bus *bus,
>  			     gpa_t addr, int len)
>  {
> @@ -3574,7 +3559,9 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
>  int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
>  			    int len, struct kvm_io_device *dev)
>  {
> +	int i;
>  	struct kvm_io_bus *new_bus, *bus;
> +	struct kvm_io_range range;
>  
>  	bus = kvm_get_bus(kvm, bus_idx);
>  	if (!bus)
> @@ -3588,9 +3575,22 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
>  			  sizeof(struct kvm_io_range)), GFP_KERNEL);
>  	if (!new_bus)
>  		return -ENOMEM;
> -	memcpy(new_bus, bus, sizeof(*bus) + (bus->dev_count *
> -	       sizeof(struct kvm_io_range)));
> -	kvm_io_bus_insert_dev(new_bus, dev, addr, len);
> +
> +	range = (struct kvm_io_range) {
> +		.addr = addr,
> +		.len = len,
> +		.dev = dev,
> +	};
> +
> +	for (i = 0; i < bus->dev_count; i++)
> +		if (kvm_io_bus_cmp(&bus->range[i], &range) > 0)
> +			break;
> +
> +	memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct kvm_io_range));
> +	new_bus->dev_count++;
> +	new_bus->range[i] = range;
> +	memcpy(new_bus->range + i + 1, bus->range + i,
> +		(bus->dev_count - i) * sizeof(struct kvm_io_range));
>  	rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
>  	synchronize_srcu_expedited(&kvm->srcu);
>  	kfree(bus);
> 

Queued, thanks.

Paolo

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2018-02-14 18:11 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-01-16 13:34 [PATCH] kvm: use insert sort in kvm_io_bus_register_dev function Gal Hammer
2018-01-16 13:45 ` Paolo Bonzini
2018-01-16 17:36 ` David Hildenbrand
     [not found]   ` <CAA2ifQzRGjEeEPiQmsqF6ou-oqsv5Px=i9KyxdnUfu80Ae_O1w@mail.gmail.com>
2018-01-17  9:11     ` David Hildenbrand
2018-01-25  7:48 ` Wanpeng Li
2018-01-25 14:53   ` Paolo Bonzini
2018-01-26  6:10     ` Wanpeng Li
2018-02-14 18:11 ` Paolo Bonzini

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.