All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] kvm: optimize ISR lookups
@ 2012-05-21 16:37 Michael S. Tsirkin
  2012-05-21 18:44 ` Michael S. Tsirkin
  2012-05-21 21:04 ` Thomas Gleixner
  0 siblings, 2 replies; 28+ messages in thread
From: Michael S. Tsirkin @ 2012-05-21 16:37 UTC (permalink / raw)
  To: kvm
  Cc: Avi Kivity, Marcelo Tosatti, Thomas Gleixner, Ingo Molnar,
	H. Peter Anvin, x86, linux-kernel

We perform ISR lookups twice: during interrupt
injection and on EOI. Typical workloads only have
a single bit set there. So we can avoid ISR scans by
1. counting bits as we set/clear them in ISR
2. if count is 1, caching the vector number
3. if count != 1, invalidating the cache

The real purpose of this is enabling PV EOI
which needs to quickly validate the vector.
But non PV guests also benefit.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 arch/x86/kvm/lapic.c |   51 ++++++++++++++++++++++++++++++++++++++++++++++++-
 arch/x86/kvm/lapic.h |    2 +
 2 files changed, 51 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 93c1574..232950a 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -107,6 +107,16 @@ static inline void apic_clear_vector(int vec, void *bitmap)
 	clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
 }
 
+static inline int __apic_test_and_set_vector(int vec, void *bitmap)
+{
+	return __test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
+}
+
+static inline int __apic_test_and_clear_vector(int vec, void *bitmap)
+{
+	return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
+}
+
 static inline int apic_hw_enabled(struct kvm_lapic *apic)
 {
 	return (apic)->vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE;
@@ -210,6 +220,16 @@ static int find_highest_vector(void *bitmap)
 		return fls(word[word_offset << 2]) - 1 + (word_offset << 5);
 }
 
+static u8 count_vectors(void *bitmap)
+{
+	u32 *word = bitmap;
+	int word_offset;
+	u8 count = 0;
+	for (word_offset = 0; word_offset < MAX_APIC_VECTOR >> 5; ++word_offset)
+		count += hweight32(word[word_offset << 2]);
+	return count;
+}
+
 static inline int apic_test_and_set_irr(int vec, struct kvm_lapic *apic)
 {
 	apic->irr_pending = true;
@@ -242,6 +262,25 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
 		apic->irr_pending = true;
 }
 
+static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
+{
+	if (!__apic_test_and_set_vector(vec, apic->regs + APIC_ISR))
+		++apic->isr_count;
+	ASSERT(apic->isr_count > MAX_APIC_VECTOR);
+	if (likely(apic->isr_count == 1))
+		apic->isr_cache = vec;
+	else
+		apic->isr_cache = -1;
+}
+
+static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
+{
+	if (__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR))
+		--apic->isr_count;
+	ASSERT(apic->isr_count < 0);
+	apic->isr_cache = -1;
+}
+
 int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
@@ -273,6 +312,10 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
 static inline int apic_find_highest_isr(struct kvm_lapic *apic)
 {
 	int result;
+	if (!apic->isr_count)
+		return -1;
+	if (likely(apic->isr_cache != -1))
+		return apic->isr_cache;
 
 	result = find_highest_vector(apic->regs + APIC_ISR);
 	ASSERT(result == -1 || result >= 16);
@@ -492,7 +535,7 @@ static void apic_set_eoi(struct kvm_lapic *apic)
 	if (vector == -1)
 		return;
 
-	apic_clear_vector(vector, apic->regs + APIC_ISR);
+	apic_clear_isr(vector, apic);
 	apic_update_ppr(apic);
 
 	if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) &&
@@ -1081,6 +1124,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
 		apic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
 	}
 	apic->irr_pending = false;
+	apic->isr_count = 0;
+	apic->isr_cache = -1;
 	update_divide_count(apic);
 	atomic_set(&apic->lapic_timer.pending, 0);
 	if (kvm_vcpu_is_bsp(vcpu))
@@ -1248,7 +1293,7 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
 	if (vector == -1)
 		return -1;
 
-	apic_set_vector(vector, apic->regs + APIC_ISR);
+	apic_set_isr(vector, apic);
 	apic_update_ppr(apic);
 	apic_clear_irr(vector, apic);
 	return vector;
@@ -1267,6 +1312,8 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu)
 	update_divide_count(apic);
 	start_apic_timer(apic);
 	apic->irr_pending = true;
+	apic->isr_count = count_vectors(apic->regs + APIC_ISR);
+	apic->isr_cache = -1;
 	kvm_make_request(KVM_REQ_EVENT, vcpu);
 }
 
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 6f4ce25..9f8deff 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -13,6 +13,8 @@ struct kvm_lapic {
 	u32 divide_count;
 	struct kvm_vcpu *vcpu;
 	bool irr_pending;
+	s16 isr_count;
+	int isr_cache;
 	void *regs;
 	gpa_t vapic_addr;
 	struct page *vapic_page;
-- 
MST

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH] kvm: optimize ISR lookups
  2012-05-21 16:37 [PATCH] kvm: optimize ISR lookups Michael S. Tsirkin
@ 2012-05-21 18:44 ` Michael S. Tsirkin
  2012-05-21 21:04 ` Thomas Gleixner
  1 sibling, 0 replies; 28+ messages in thread
From: Michael S. Tsirkin @ 2012-05-21 18:44 UTC (permalink / raw)
  To: kvm
  Cc: Avi Kivity, Marcelo Tosatti, Thomas Gleixner, Ingo Molnar,
	H. Peter Anvin, x86, linux-kernel

On Mon, May 21, 2012 at 07:37:27PM +0300, Michael S. Tsirkin wrote:
> We perform ISR lookups twice: during interrupt
> injection and on EOI. Typical workloads only have
> a single bit set there. So we can avoid ISR scans by
> 1. counting bits as we set/clear them in ISR
> 2. if count is 1, caching the vector number
> 3. if count != 1, invalidating the cache
> 
> The real purpose of this is enabling PV EOI
> which needs to quickly validate the vector.
> But non PV guests also benefit.
> 
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

BTW host to guest netperf result (which is 100% CPU bound)
seems to increase pretty drastically:

before

TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 11.0.0.104
(11.0.0.104) port 0 AF_INET
Recv   Send    Send                          Utilization       Service
Demand
Socket Socket  Message  Elapsed              Send     Recv     Send
Recv
Size   Size    Size     Time     Throughput  local    remote   local
remote
bytes  bytes   bytes    secs.    10^6bits/s  % S      % S      us/KB
us/KB

 87380  16384  16384    10.01      7905.79   11.26    99.50    2.801 1.031  

after

TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 11.0.0.104
(11.0.0.104) port 0 AF_INET
Recv   Send    Send                          Utilization       Service
Demand
Socket Socket  Message  Elapsed              Send     Recv     Send
Recv
Size   Size    Size     Time     Throughput  local    remote   local
remote
bytes  bytes   bytes    secs.    10^6bits/s  % S      % S      us/KB
us/KB

 87380  16384  16384    10.01      8826.59   11.36    99.20    2.531 0.921  

> ---
>  arch/x86/kvm/lapic.c |   51 ++++++++++++++++++++++++++++++++++++++++++++++++-
>  arch/x86/kvm/lapic.h |    2 +
>  2 files changed, 51 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 93c1574..232950a 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -107,6 +107,16 @@ static inline void apic_clear_vector(int vec, void *bitmap)
>  	clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
>  }
>  
> +static inline int __apic_test_and_set_vector(int vec, void *bitmap)
> +{
> +	return __test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
> +}
> +
> +static inline int __apic_test_and_clear_vector(int vec, void *bitmap)
> +{
> +	return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
> +}
> +
>  static inline int apic_hw_enabled(struct kvm_lapic *apic)
>  {
>  	return (apic)->vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE;
> @@ -210,6 +220,16 @@ static int find_highest_vector(void *bitmap)
>  		return fls(word[word_offset << 2]) - 1 + (word_offset << 5);
>  }
>  
> +static u8 count_vectors(void *bitmap)
> +{
> +	u32 *word = bitmap;
> +	int word_offset;
> +	u8 count = 0;
> +	for (word_offset = 0; word_offset < MAX_APIC_VECTOR >> 5; ++word_offset)
> +		count += hweight32(word[word_offset << 2]);
> +	return count;
> +}
> +
>  static inline int apic_test_and_set_irr(int vec, struct kvm_lapic *apic)
>  {
>  	apic->irr_pending = true;
> @@ -242,6 +262,25 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
>  		apic->irr_pending = true;
>  }
>  
> +static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
> +{
> +	if (!__apic_test_and_set_vector(vec, apic->regs + APIC_ISR))
> +		++apic->isr_count;
> +	ASSERT(apic->isr_count > MAX_APIC_VECTOR);
> +	if (likely(apic->isr_count == 1))
> +		apic->isr_cache = vec;
> +	else
> +		apic->isr_cache = -1;
> +}
> +
> +static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
> +{
> +	if (__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR))
> +		--apic->isr_count;
> +	ASSERT(apic->isr_count < 0);
> +	apic->isr_cache = -1;
> +}
> +
>  int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
>  {
>  	struct kvm_lapic *apic = vcpu->arch.apic;
> @@ -273,6 +312,10 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
>  static inline int apic_find_highest_isr(struct kvm_lapic *apic)
>  {
>  	int result;
> +	if (!apic->isr_count)
> +		return -1;
> +	if (likely(apic->isr_cache != -1))
> +		return apic->isr_cache;
>  
>  	result = find_highest_vector(apic->regs + APIC_ISR);
>  	ASSERT(result == -1 || result >= 16);
> @@ -492,7 +535,7 @@ static void apic_set_eoi(struct kvm_lapic *apic)
>  	if (vector == -1)
>  		return;
>  
> -	apic_clear_vector(vector, apic->regs + APIC_ISR);
> +	apic_clear_isr(vector, apic);
>  	apic_update_ppr(apic);
>  
>  	if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) &&
> @@ -1081,6 +1124,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
>  		apic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
>  	}
>  	apic->irr_pending = false;
> +	apic->isr_count = 0;
> +	apic->isr_cache = -1;
>  	update_divide_count(apic);
>  	atomic_set(&apic->lapic_timer.pending, 0);
>  	if (kvm_vcpu_is_bsp(vcpu))
> @@ -1248,7 +1293,7 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
>  	if (vector == -1)
>  		return -1;
>  
> -	apic_set_vector(vector, apic->regs + APIC_ISR);
> +	apic_set_isr(vector, apic);
>  	apic_update_ppr(apic);
>  	apic_clear_irr(vector, apic);
>  	return vector;
> @@ -1267,6 +1312,8 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu)
>  	update_divide_count(apic);
>  	start_apic_timer(apic);
>  	apic->irr_pending = true;
> +	apic->isr_count = count_vectors(apic->regs + APIC_ISR);
> +	apic->isr_cache = -1;
>  	kvm_make_request(KVM_REQ_EVENT, vcpu);
>  }
>  
> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> index 6f4ce25..9f8deff 100644
> --- a/arch/x86/kvm/lapic.h
> +++ b/arch/x86/kvm/lapic.h
> @@ -13,6 +13,8 @@ struct kvm_lapic {
>  	u32 divide_count;
>  	struct kvm_vcpu *vcpu;
>  	bool irr_pending;
> +	s16 isr_count;
> +	int isr_cache;
>  	void *regs;
>  	gpa_t vapic_addr;
>  	struct page *vapic_page;
> -- 
> MST

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] kvm: optimize ISR lookups
  2012-05-21 16:37 [PATCH] kvm: optimize ISR lookups Michael S. Tsirkin
  2012-05-21 18:44 ` Michael S. Tsirkin
@ 2012-05-21 21:04 ` Thomas Gleixner
  2012-05-21 21:51   ` Michael S. Tsirkin
                     ` (2 more replies)
  1 sibling, 3 replies; 28+ messages in thread
From: Thomas Gleixner @ 2012-05-21 21:04 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: kvm, Avi Kivity, Marcelo Tosatti, Ingo Molnar, H. Peter Anvin,
	x86, linux-kernel

On Mon, 21 May 2012, Michael S. Tsirkin wrote:

> We perform ISR lookups twice: during interrupt
> injection and on EOI. Typical workloads only have
> a single bit set there. So we can avoid ISR scans by
> 1. counting bits as we set/clear them in ISR
> 2. if count is 1, caching the vector number
> 3. if count != 1, invalidating the cache
> 
> The real purpose of this is enabling PV EOI
> which needs to quickly validate the vector.
> But non PV guests also benefit.
> 
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
>  arch/x86/kvm/lapic.c |   51 ++++++++++++++++++++++++++++++++++++++++++++++++-
>  arch/x86/kvm/lapic.h |    2 +
>  2 files changed, 51 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 93c1574..232950a 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -107,6 +107,16 @@ static inline void apic_clear_vector(int vec, void *bitmap)
>  	clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
>  }
>  
> +static inline int __apic_test_and_set_vector(int vec, void *bitmap)
> +{
> +	return __test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
> +}
> +
> +static inline int __apic_test_and_clear_vector(int vec, void *bitmap)
> +{
> +	return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
> +}
> +
>  static inline int apic_hw_enabled(struct kvm_lapic *apic)
>  {
>  	return (apic)->vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE;
> @@ -210,6 +220,16 @@ static int find_highest_vector(void *bitmap)
>  		return fls(word[word_offset << 2]) - 1 + (word_offset << 5);
>  }
>  
> +static u8 count_vectors(void *bitmap)
> +{
> +	u32 *word = bitmap;
> +	int word_offset;
> +	u8 count = 0;
> +	for (word_offset = 0; word_offset < MAX_APIC_VECTOR >> 5; ++word_offset)
> +		count += hweight32(word[word_offset << 2]);
> +	return count;
> +}

Why do you need to reimplement bitmap_weight()?

Because your bitmap is not a bitmap, but a set of 32bit registers
which have an offset of 4 words each. I really had to twist my brain
around this function and the test_and_clr/set ones above just because
the name bitmap is so horribly misleading. Add an extra bonus for
using void pointers.

Yes, I know, the existing code is full of this, but that's not an
excuse to add more of it.

This emulation is just silly. Nothing in an emulation where the access
to the emulated hardware is implemented with vmexits is requiring a
1:1 memory layout. It's completely irrelevant whether the hardware has
an ISR regs offset of 0x10 or not. Nothing prevents the emulation to
use a consecutive bitmap for the vector bits instead of reimplementing
a boatload of bitmap operations.

The only justification for having the same layout as the actual
hardware is when you are going to map the memory into the guest space,
which is not the case here.

And if you look deeper, then you'll notice that _ALL_ APIC registers
are on a 16 byte boundary (thanks Peter for pointing that out).

So it's even more silly to have a 1:1 representation instead of
implementing the default emulated apic_read/write functions to access
(offset >> 2).

And of course, that design decision causes lookups to be slow. It's
way faster to scan a consecutive bitmap than iterating through eight
32bit words with an offset of 0x10, especially on a 64 bit host.

>  static inline int apic_test_and_set_irr(int vec, struct kvm_lapic *apic)
>  {
>  	apic->irr_pending = true;
> @@ -242,6 +262,25 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
>  		apic->irr_pending = true;
>  }
>  
> +static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
> +{
> +	if (!__apic_test_and_set_vector(vec, apic->regs + APIC_ISR))
> +		++apic->isr_count;
> +	ASSERT(apic->isr_count > MAX_APIC_VECTOR);

I'm really curious what you observed when defining DEBUG in that file.

Clearly you never did.

> +	if (likely(apic->isr_count == 1))
> +		apic->isr_cache = vec;
> +	else
> +		apic->isr_cache = -1;
> +}
> +
> +static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
> +{
> +	if (__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR))
> +		--apic->isr_count;
> +	ASSERT(apic->isr_count < 0);

Ditto.

>  	result = find_highest_vector(apic->regs + APIC_ISR);
>  	ASSERT(result == -1 || result >= 16);

And obviously none of the folks who added this gem bothered to define
DEBUG either.

So please instead of working around horrid design decisions and adding
more mess to the existing one, can we please cleanup the stuff first
and then decide whether it's worth to add the extra magic?

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] kvm: optimize ISR lookups
  2012-05-21 21:04 ` Thomas Gleixner
@ 2012-05-21 21:51   ` Michael S. Tsirkin
  2012-05-21 22:14     ` Thomas Gleixner
  2012-05-21 23:06   ` Michael S. Tsirkin
  2012-05-22 10:59   ` Avi Kivity
  2 siblings, 1 reply; 28+ messages in thread
From: Michael S. Tsirkin @ 2012-05-21 21:51 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: kvm, Avi Kivity, Marcelo Tosatti, Ingo Molnar, H. Peter Anvin,
	x86, linux-kernel

On Mon, May 21, 2012 at 11:04:25PM +0200, Thomas Gleixner wrote:
> > @@ -242,6 +262,25 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
> >  		apic->irr_pending = true;
> >  }
> >  
> > +static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
> > +{
> > +	if (!__apic_test_and_set_vector(vec, apic->regs + APIC_ISR))
> > +		++apic->isr_count;
> > +	ASSERT(apic->isr_count > MAX_APIC_VECTOR);
> 
> I'm really curious what you observed when defining DEBUG in that file.
> 
> Clearly you never did.

Sorry :(
Yes clearly silly, thanks for pointing this out.

> > +	if (likely(apic->isr_count == 1))
> > +		apic->isr_cache = vec;
> > +	else
> > +		apic->isr_cache = -1;
> > +}
> > +
> > +static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
> > +{
> > +	if (__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR))
> > +		--apic->isr_count;
> > +	ASSERT(apic->isr_count < 0);
> 
> Ditto.
> 
> >  	result = find_highest_vector(apic->regs + APIC_ISR);
> >  	ASSERT(result == -1 || result >= 16);
> 
> And obviously none of the folks who added this gem bothered to define
> DEBUG either.
> 
> So please instead of working around horrid design decisions and adding
> more mess to the existing one, can we please cleanup the stuff first
> and then decide whether it's worth to add the extra magic?
> 
> Thanks,
> 
> 	tglx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] kvm: optimize ISR lookups
  2012-05-21 21:51   ` Michael S. Tsirkin
@ 2012-05-21 22:14     ` Thomas Gleixner
  2012-05-21 22:24       ` Michael S. Tsirkin
  0 siblings, 1 reply; 28+ messages in thread
From: Thomas Gleixner @ 2012-05-21 22:14 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: kvm, Avi Kivity, Marcelo Tosatti, Ingo Molnar, H. Peter Anvin,
	x86, linux-kernel

On Tue, 22 May 2012, Michael S. Tsirkin wrote:

> On Mon, May 21, 2012 at 11:04:25PM +0200, Thomas Gleixner wrote:
> > > @@ -242,6 +262,25 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
> > >  		apic->irr_pending = true;
> > >  }
> > >  
> > > +static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
> > > +{
> > > +	if (!__apic_test_and_set_vector(vec, apic->regs + APIC_ISR))
> > > +		++apic->isr_count;
> > > +	ASSERT(apic->isr_count > MAX_APIC_VECTOR);
> > 
> > I'm really curious what you observed when defining DEBUG in that file.
> > 
> > Clearly you never did.
> 
> Sorry :(
> Yes clearly silly, thanks for pointing this out.

That's all you have a reply for? That's the least of the problems ....

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] kvm: optimize ISR lookups
  2012-05-21 22:14     ` Thomas Gleixner
@ 2012-05-21 22:24       ` Michael S. Tsirkin
  2012-05-21 22:44         ` Thomas Gleixner
                           ` (2 more replies)
  0 siblings, 3 replies; 28+ messages in thread
From: Michael S. Tsirkin @ 2012-05-21 22:24 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: kvm, Avi Kivity, Marcelo Tosatti, Ingo Molnar, H. Peter Anvin,
	x86, linux-kernel

On Tue, May 22, 2012 at 12:14:18AM +0200, Thomas Gleixner wrote:
> On Tue, 22 May 2012, Michael S. Tsirkin wrote:
> 
> > On Mon, May 21, 2012 at 11:04:25PM +0200, Thomas Gleixner wrote:
> > > > @@ -242,6 +262,25 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
> > > >  		apic->irr_pending = true;
> > > >  }
> > > >  
> > > > +static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
> > > > +{
> > > > +	if (!__apic_test_and_set_vector(vec, apic->regs + APIC_ISR))
> > > > +		++apic->isr_count;
> > > > +	ASSERT(apic->isr_count > MAX_APIC_VECTOR);
> > > 
> > > I'm really curious what you observed when defining DEBUG in that file.
> > > 
> > > Clearly you never did.
> > 
> > Sorry :(
> > Yes clearly silly, thanks for pointing this out.
> 
> That's all you have a reply for? That's the least of the problems ....

Others are not my fault :)

Seriously, if Avi/Marcelo want to rewrite the ISR emulation
I can help. If they want to keep it 1:1 with hardware
then what I wrote seems the only way.

So waiting for them to respond.

-- 
MST

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] kvm: optimize ISR lookups
  2012-05-21 22:24       ` Michael S. Tsirkin
@ 2012-05-21 22:44         ` Thomas Gleixner
  2012-05-21 22:50           ` Michael S. Tsirkin
  2012-05-21 23:01         ` Thomas Gleixner
  2012-05-21 23:11         ` Thomas Gleixner
  2 siblings, 1 reply; 28+ messages in thread
From: Thomas Gleixner @ 2012-05-21 22:44 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: kvm, Avi Kivity, Marcelo Tosatti, Ingo Molnar, H. Peter Anvin,
	x86, linux-kernel

Michael,

On Tue, 22 May 2012, Michael S. Tsirkin wrote:
> On Tue, May 22, 2012 at 12:14:18AM +0200, Thomas Gleixner wrote:
> > On Tue, 22 May 2012, Michael S. Tsirkin wrote:
> > 
> > > On Mon, May 21, 2012 at 11:04:25PM +0200, Thomas Gleixner wrote:
> > > > > @@ -242,6 +262,25 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
> > > > >  		apic->irr_pending = true;
> > > > >  }
> > > > >  
> > > > > +static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
> > > > > +{
> > > > > +	if (!__apic_test_and_set_vector(vec, apic->regs + APIC_ISR))
> > > > > +		++apic->isr_count;
> > > > > +	ASSERT(apic->isr_count > MAX_APIC_VECTOR);
> > > > 
> > > > I'm really curious what you observed when defining DEBUG in that file.
> > > > 
> > > > Clearly you never did.
> > > 
> > > Sorry :(
> > > Yes clearly silly, thanks for pointing this out.
> > 
> > That's all you have a reply for? That's the least of the problems ....
> 
> Others are not my fault :)

Interesting. The other changes you added are not your fault?

So you provided a patch which you didn't author completely. You merily
added the bogus ASSERTs, right ?

Can you please explain why there is only your SOB on that patch?

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] kvm: optimize ISR lookups
  2012-05-21 22:44         ` Thomas Gleixner
@ 2012-05-21 22:50           ` Michael S. Tsirkin
  0 siblings, 0 replies; 28+ messages in thread
From: Michael S. Tsirkin @ 2012-05-21 22:50 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: kvm, Avi Kivity, Marcelo Tosatti, Ingo Molnar, H. Peter Anvin,
	x86, linux-kernel

On Tue, May 22, 2012 at 12:44:39AM +0200, Thomas Gleixner wrote:
> Michael,
> 
> On Tue, 22 May 2012, Michael S. Tsirkin wrote:
> > On Tue, May 22, 2012 at 12:14:18AM +0200, Thomas Gleixner wrote:
> > > On Tue, 22 May 2012, Michael S. Tsirkin wrote:
> > > 
> > > > On Mon, May 21, 2012 at 11:04:25PM +0200, Thomas Gleixner wrote:
> > > > > > @@ -242,6 +262,25 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
> > > > > >  		apic->irr_pending = true;
> > > > > >  }
> > > > > >  
> > > > > > +static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
> > > > > > +{
> > > > > > +	if (!__apic_test_and_set_vector(vec, apic->regs + APIC_ISR))
> > > > > > +		++apic->isr_count;
> > > > > > +	ASSERT(apic->isr_count > MAX_APIC_VECTOR);
> > > > > 
> > > > > I'm really curious what you observed when defining DEBUG in that file.
> > > > > 
> > > > > Clearly you never did.
> > > > 
> > > > Sorry :(
> > > > Yes clearly silly, thanks for pointing this out.
> > > 
> > > That's all you have a reply for? That's the least of the problems ....
> > 
> > Others are not my fault :)
> 
> Interesting. The other changes you added are not your fault?
> 
> So you provided a patch which you didn't author completely. You merily
> added the bogus ASSERTs, right ?
> 
> Can you please explain why there is only your SOB on that patch?
> 
> Thanks,
> 
> 	tglx

I authored the patch, sorry if I didn't make it clear:
the bitmap data-structure that you noted as inefficient
is part of Linux, I merely keep it as it is upstream.

I'll reply to the original mail, it's easier.


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] kvm: optimize ISR lookups
  2012-05-21 22:24       ` Michael S. Tsirkin
  2012-05-21 22:44         ` Thomas Gleixner
@ 2012-05-21 23:01         ` Thomas Gleixner
  2012-05-22 10:46           ` Avi Kivity
  2012-05-21 23:11         ` Thomas Gleixner
  2 siblings, 1 reply; 28+ messages in thread
From: Thomas Gleixner @ 2012-05-21 23:01 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: kvm, Avi Kivity, Marcelo Tosatti, Ingo Molnar, H. Peter Anvin,
	x86, linux-kernel

On Tue, 22 May 2012, Michael S. Tsirkin wrote:
> On Tue, May 22, 2012 at 12:14:18AM +0200, Thomas Gleixner wrote:
> > On Tue, 22 May 2012, Michael S. Tsirkin wrote:
> > 
> > > On Mon, May 21, 2012 at 11:04:25PM +0200, Thomas Gleixner wrote:
> > > > > @@ -242,6 +262,25 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
> > > > >  		apic->irr_pending = true;
> > > > >  }
> > > > >  
> > > > > +static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
> > > > > +{
> > > > > +	if (!__apic_test_and_set_vector(vec, apic->regs + APIC_ISR))
> > > > > +		++apic->isr_count;
> > > > > +	ASSERT(apic->isr_count > MAX_APIC_VECTOR);
> > > > 
> > > > I'm really curious what you observed when defining DEBUG in that file.
> > > > 
> > > > Clearly you never did.
> > > 
> > > Sorry :(
> > > Yes clearly silly, thanks for pointing this out.
> > 
> > That's all you have a reply for? That's the least of the problems ....
> 
> Others are not my fault :)
> 
> Seriously, if Avi/Marcelo want to rewrite the ISR emulation

Interesting POV, really.

Did you ever notice that the kernel is a collaborative effort and not
controlled by "Avi/Marcelo"?

Did you ever notice that arch/x86/kvm is part of arch/x86? 

And if you did, did you notice that there are maintainers responsible
for that code base?

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] kvm: optimize ISR lookups
  2012-05-21 21:04 ` Thomas Gleixner
  2012-05-21 21:51   ` Michael S. Tsirkin
@ 2012-05-21 23:06   ` Michael S. Tsirkin
  2012-05-21 23:12     ` H. Peter Anvin
  2012-05-21 23:36     ` Thomas Gleixner
  2012-05-22 10:59   ` Avi Kivity
  2 siblings, 2 replies; 28+ messages in thread
From: Michael S. Tsirkin @ 2012-05-21 23:06 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: kvm, Avi Kivity, Marcelo Tosatti, Ingo Molnar, H. Peter Anvin,
	x86, linux-kernel

On Mon, May 21, 2012 at 11:04:25PM +0200, Thomas Gleixner wrote:
> On Mon, 21 May 2012, Michael S. Tsirkin wrote:
> 
> > We perform ISR lookups twice: during interrupt
> > injection and on EOI. Typical workloads only have
> > a single bit set there. So we can avoid ISR scans by
> > 1. counting bits as we set/clear them in ISR
> > 2. if count is 1, caching the vector number
> > 3. if count != 1, invalidating the cache
> > 
> > The real purpose of this is enabling PV EOI
> > which needs to quickly validate the vector.
> > But non PV guests also benefit.
> > 
> > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > ---
> >  arch/x86/kvm/lapic.c |   51 ++++++++++++++++++++++++++++++++++++++++++++++++-
> >  arch/x86/kvm/lapic.h |    2 +
> >  2 files changed, 51 insertions(+), 2 deletions(-)
> > 
> > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> > index 93c1574..232950a 100644
> > --- a/arch/x86/kvm/lapic.c
> > +++ b/arch/x86/kvm/lapic.c
> > @@ -107,6 +107,16 @@ static inline void apic_clear_vector(int vec, void *bitmap)
> >  	clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
> >  }
> >  
> > +static inline int __apic_test_and_set_vector(int vec, void *bitmap)
> > +{
> > +	return __test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
> > +}
> > +
> > +static inline int __apic_test_and_clear_vector(int vec, void *bitmap)
> > +{
> > +	return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
> > +}
> > +
> >  static inline int apic_hw_enabled(struct kvm_lapic *apic)
> >  {
> >  	return (apic)->vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE;
> > @@ -210,6 +220,16 @@ static int find_highest_vector(void *bitmap)
> >  		return fls(word[word_offset << 2]) - 1 + (word_offset << 5);
> >  }
> >  
> > +static u8 count_vectors(void *bitmap)
> > +{
> > +	u32 *word = bitmap;
> > +	int word_offset;
> > +	u8 count = 0;
> > +	for (word_offset = 0; word_offset < MAX_APIC_VECTOR >> 5; ++word_offset)
> > +		count += hweight32(word[word_offset << 2]);
> > +	return count;
> > +}
> 
> Why do you need to reimplement bitmap_weight()?
> 
> Because your bitmap is not a bitmap,

It's an existing "bitmap". It's not my bitmap :)

> but a set of 32bit registers
> which have an offset of 4 words each. I really had to twist my brain
> around this function and the test_and_clr/set ones above just because
> the name bitmap is so horribly misleading. Add an extra bonus for
> using void pointers.
> 
> Yes, I know, the existing code is full of this, but that's not an
> excuse to add more of it.

There's no other way to use existing code. If current code
is reworked to use bitmap.h then my patch can use it too.


> This emulation is just silly. Nothing in an emulation where the access
> to the emulated hardware is implemented with vmexits is requiring a
> 1:1 memory layout. It's completely irrelevant whether the hardware has
> an ISR regs offset of 0x10 or not. Nothing prevents the emulation to
> use a consecutive bitmap for the vector bits instead of reimplementing
> a boatload of bitmap operations.
> 
> The only justification for having the same layout as the actual
> hardware is when you are going to map the memory into the guest space,
> which is not the case here.

I think the reason is __apic_read which now simply copies the registers
out to guest, this code will become less straight-forward if it's not
1:1.

> And if you look deeper, then you'll notice that _ALL_ APIC registers
> are on a 16 byte boundary (thanks Peter for pointing that out).
> 
> So it's even more silly to have a 1:1 representation instead of
> implementing the default emulated apic_read/write functions to access
> (offset >> 2).
> 
> And of course, that design decision causes lookups to be slow.

Yes, it might be one of the reasons why my patch helps so
much: it adds a cache in front of this data structure.

> It's
> way faster to scan a consecutive bitmap than iterating through eight
> 32bit words with an offset of 0x10, especially on a 64 bit host.
>
> >  static inline int apic_test_and_set_irr(int vec, struct kvm_lapic *apic)
> >  {
> >  	apic->irr_pending = true;
> > @@ -242,6 +262,25 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
> >  		apic->irr_pending = true;
> >  }
> >  
> > +static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
> > +{
> > +	if (!__apic_test_and_set_vector(vec, apic->regs + APIC_ISR))
> > +		++apic->isr_count;
> > +	ASSERT(apic->isr_count > MAX_APIC_VECTOR);
> 
> I'm really curious what you observed when defining DEBUG in that file.
> 
> Clearly you never did.
> 
> > +	if (likely(apic->isr_count == 1))
> > +		apic->isr_cache = vec;
> > +	else
> > +		apic->isr_cache = -1;
> > +}
> > +
> > +static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
> > +{
> > +	if (__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR))
> > +		--apic->isr_count;
> > +	ASSERT(apic->isr_count < 0);
> 
> Ditto.
> 
> >  	result = find_highest_vector(apic->regs + APIC_ISR);
> >  	ASSERT(result == -1 || result >= 16);
> 
> And obviously none of the folks who added this gem bothered to define
> DEBUG either.
> 
> So please instead of working around horrid design decisions and adding
> more mess to the existing one, can we please cleanup the stuff first
> and then decide whether it's worth to add the extra magic?
> 
> Thanks,
> 
> 	tglx


So what you propose is in fact to rework apic registers at least for
ISR,IRR,TMR to use a bitmap.
I am fine with this suggestion but would like some feedback from kvm
maintainers on where they want to go before I spend time on that.

-- 
MST

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] kvm: optimize ISR lookups
  2012-05-21 22:24       ` Michael S. Tsirkin
  2012-05-21 22:44         ` Thomas Gleixner
  2012-05-21 23:01         ` Thomas Gleixner
@ 2012-05-21 23:11         ` Thomas Gleixner
  2 siblings, 0 replies; 28+ messages in thread
From: Thomas Gleixner @ 2012-05-21 23:11 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: kvm, Avi Kivity, Marcelo Tosatti, Ingo Molnar, H. Peter Anvin,
	x86, linux-kernel

On Tue, 22 May 2012, Michael S. Tsirkin wrote:
> On Tue, May 22, 2012 at 12:14:18AM +0200, Thomas Gleixner wrote:
> > On Tue, 22 May 2012, Michael S. Tsirkin wrote:
> > 
> > > On Mon, May 21, 2012 at 11:04:25PM +0200, Thomas Gleixner wrote:
> > > > > @@ -242,6 +262,25 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
> > > > >  		apic->irr_pending = true;
> > > > >  }
> > > > >  
> > > > > +static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
> > > > > +{
> > > > > +	if (!__apic_test_and_set_vector(vec, apic->regs + APIC_ISR))
> > > > > +		++apic->isr_count;
> > > > > +	ASSERT(apic->isr_count > MAX_APIC_VECTOR);
> > > > 
> > > > I'm really curious what you observed when defining DEBUG in that file.
> > > > 
> > > > Clearly you never did.
> > > 
> > > Sorry :(
> > > Yes clearly silly, thanks for pointing this out.
> > 
> > That's all you have a reply for? That's the least of the problems ....
> 
> Others are not my fault :)
> 
> Seriously, if Avi/Marcelo want to rewrite the ISR emulation
> I can help. If they want to keep it 1:1 with hardware
> then what I wrote seems the only way.

Seriously. You submitted a code monkey patch, solving a problem by
curing the symptom, but not the root cause.

And then you hide behind Avi and Marcelo?

Did you ever think about the real problem of that lapic emulation?

Let's assume you did and it occured to you that the whole thing is
wrong and backwards, then you still insist on adding more bullshit to
that file instead of optimizing and fixing it in the first place?

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] kvm: optimize ISR lookups
  2012-05-21 23:06   ` Michael S. Tsirkin
@ 2012-05-21 23:12     ` H. Peter Anvin
  2012-05-21 23:36     ` Thomas Gleixner
  1 sibling, 0 replies; 28+ messages in thread
From: H. Peter Anvin @ 2012-05-21 23:12 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Thomas Gleixner, kvm, Avi Kivity, Marcelo Tosatti, Ingo Molnar,
	x86, linux-kernel

On 05/21/2012 04:06 PM, Michael S. Tsirkin wrote:
> 
> I think the reason is __apic_read which now simply copies the registers
> out to guest, this code will become less straight-forward if it's not
> 1:1.
> 

It can still be 1:1, just drop the 12 bytes of completely useless
padding after each 32-bit datum.

>> And if you look deeper, then you'll notice that _ALL_ APIC registers
>> are on a 16 byte boundary (thanks Peter for pointing that out).
>>
>> So it's even more silly to have a 1:1 representation instead of
>> implementing the default emulated apic_read/write functions to access
>> (offset >> 2).
>>
>> And of course, that design decision causes lookups to be slow.
> 
> Yes, it might be one of the reasons why my patch helps so
> much: it adds a cache in front of this data structure.
> 

Well, *fix the fscking data structure first*.

> 
> So what you propose is in fact to rework apic registers at least for
> ISR,IRR,TMR to use a bitmap.
> I am fine with this suggestion but would like some feedback from kvm
> maintainers on where they want to go before I spend time on that.
> 

This should be a 20-minute hack.

	-hpa

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] kvm: optimize ISR lookups
  2012-05-21 23:06   ` Michael S. Tsirkin
  2012-05-21 23:12     ` H. Peter Anvin
@ 2012-05-21 23:36     ` Thomas Gleixner
  1 sibling, 0 replies; 28+ messages in thread
From: Thomas Gleixner @ 2012-05-21 23:36 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: kvm, Avi Kivity, Marcelo Tosatti, Ingo Molnar, H. Peter Anvin,
	x86, linux-kernel

On Tue, 22 May 2012, Michael S. Tsirkin wrote:
> On Mon, May 21, 2012 at 11:04:25PM +0200, Thomas Gleixner wrote:
> > On Mon, 21 May 2012, Michael S. Tsirkin wrote:
> > > +static u8 count_vectors(void *bitmap)
> > > +{
> > > +	u32 *word = bitmap;
> > > +	int word_offset;
> > > +	u8 count = 0;
> > > +	for (word_offset = 0; word_offset < MAX_APIC_VECTOR >> 5; ++word_offset)
> > > +		count += hweight32(word[word_offset << 2]);
> > > +	return count;
> > > +}
> > 
> > Why do you need to reimplement bitmap_weight()?
> > 
> > Because your bitmap is not a bitmap,
> 
> It's an existing "bitmap". It's not my bitmap :)

Dammit. You added a function:

+static u8 count_vectors(void *bitmap)

So it's YOUR bitmap. It's YOUR fault that you copied mindlessly the
existing crap.

And you just copied it to push performance without even thinking about
the underlying problems.

And now you expect me to accept this just because someone else missed
to use his brain when implementing the exisiting nonsense ?

> > Yes, I know, the existing code is full of this, but that's not an
> > excuse to add more of it.
> 
> There's no other way to use existing code. If current code
> is reworked to use bitmap.h then my patch can use it too.

Then sit down and rework the existing code instead of insisting on
something which makes the code worse than it is already.

> > This emulation is just silly. Nothing in an emulation where the access
> > to the emulated hardware is implemented with vmexits is requiring a
> > 1:1 memory layout. It's completely irrelevant whether the hardware has
> > an ISR regs offset of 0x10 or not. Nothing prevents the emulation to
> > use a consecutive bitmap for the vector bits instead of reimplementing
> > a boatload of bitmap operations.
> > 
> > The only justification for having the same layout as the actual
> > hardware is when you are going to map the memory into the guest space,
> > which is not the case here.
> 
> I think the reason is __apic_read which now simply copies the registers
> out to guest, this code will become less straight-forward if it's not
> 1:1.

Oh yes. You didn't even read the few lines below, which explain what's
wrong and why the existing code is less straight forward than
optimized code.
 
> > And if you look deeper, then you'll notice that _ALL_ APIC registers
> > are on a 16 byte boundary (thanks Peter for pointing that out).
> > 
> > So it's even more silly to have a 1:1 representation instead of
> > implementing the default emulated apic_read/write functions to access
> > (offset >> 2).
> > 
> > And of course, that design decision causes lookups to be slow.
> 
> Yes, it might be one of the reasons why my patch helps so
> much: it adds a cache in front of this data structure.

Obviously you didn't even try to answer my obresvations/questions, you
are just justfying your hackery.

> So what you propose is in fact to rework apic registers at least for
> ISR,IRR,TMR to use a bitmap.

This is the final confirmation that you never tried to understand my
reply. Hint: "s/at least.*//"

> I am fine with this suggestion but would like some feedback from kvm
> maintainers on where they want to go before I spend time on that.

Are the kvm maintainers controlling your common sense or what ?

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] kvm: optimize ISR lookups
  2012-05-21 23:01         ` Thomas Gleixner
@ 2012-05-22 10:46           ` Avi Kivity
  2012-05-23 14:48             ` Ingo Molnar
  0 siblings, 1 reply; 28+ messages in thread
From: Avi Kivity @ 2012-05-22 10:46 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Michael S. Tsirkin, kvm, Marcelo Tosatti, Ingo Molnar,
	H. Peter Anvin, x86, linux-kernel

On 05/22/2012 02:01 AM, Thomas Gleixner wrote:
> > 
> > Others are not my fault :)
> > 
> > Seriously, if Avi/Marcelo want to rewrite the ISR emulation
>
> Interesting POV, really.
>
> Did you ever notice that the kernel is a collaborative effort and not
> controlled by "Avi/Marcelo"?
>
> Did you ever notice that arch/x86/kvm is part of arch/x86? 

This is silly.  Most of the time the kernel is advanced by incremental
patches.  Sometimes it is advanced by minor or major refactoring.  It is
never advanced by personal attacks on contributors.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] kvm: optimize ISR lookups
  2012-05-21 21:04 ` Thomas Gleixner
  2012-05-21 21:51   ` Michael S. Tsirkin
  2012-05-21 23:06   ` Michael S. Tsirkin
@ 2012-05-22 10:59   ` Avi Kivity
  2012-05-22 17:26     ` Thomas Gleixner
  2 siblings, 1 reply; 28+ messages in thread
From: Avi Kivity @ 2012-05-22 10:59 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Michael S. Tsirkin, kvm, Marcelo Tosatti, Ingo Molnar,
	H. Peter Anvin, x86, linux-kernel

On 05/22/2012 12:04 AM, Thomas Gleixner wrote:
> >  
> > +static u8 count_vectors(void *bitmap)
> > +{
> > +	u32 *word = bitmap;
> > +	int word_offset;
> > +	u8 count = 0;
> > +	for (word_offset = 0; word_offset < MAX_APIC_VECTOR >> 5; ++word_offset)
> > +		count += hweight32(word[word_offset << 2]);
> > +	return count;
> > +}
>
> Why do you need to reimplement bitmap_weight()?
>
> Because your bitmap is not a bitmap, but a set of 32bit registers
> which have an offset of 4 words each. I really had to twist my brain
> around this function and the test_and_clr/set ones above just because
> the name bitmap is so horribly misleading. Add an extra bonus for
> using void pointers.
>
> Yes, I know, the existing code is full of this, but that's not an
> excuse to add more of it.
>
> This emulation is just silly. Nothing in an emulation where the access
> to the emulated hardware is implemented with vmexits is requiring a
> 1:1 memory layout. It's completely irrelevant whether the hardware has
> an ISR regs offset of 0x10 or not. Nothing prevents the emulation to
> use a consecutive bitmap for the vector bits instead of reimplementing
> a boatload of bitmap operations.
>
> The only justification for having the same layout as the actual
> hardware is when you are going to map the memory into the guest space,
> which is not the case here.
>
>

The APIC page is in fact mapped  to the hardware (not the guest, but vmx
microcode does access it).  Only one register, the TPR, is ever used. 
It's possible to re-layout the data structure so that the TPR stays in
the same place while everything else becomes contiguous, but we'll have
to do it again if the hardware starts mapping more registers.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] kvm: optimize ISR lookups
  2012-05-22 10:59   ` Avi Kivity
@ 2012-05-22 17:26     ` Thomas Gleixner
  2012-05-23 15:10       ` Avi Kivity
  0 siblings, 1 reply; 28+ messages in thread
From: Thomas Gleixner @ 2012-05-22 17:26 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Michael S. Tsirkin, kvm, Marcelo Tosatti, Ingo Molnar,
	H. Peter Anvin, x86, linux-kernel

On Tue, 22 May 2012, Avi Kivity wrote:
> On 05/22/2012 12:04 AM, Thomas Gleixner wrote:
> > The only justification for having the same layout as the actual
> > hardware is when you are going to map the memory into the guest space,
> > which is not the case here.
>
> The APIC page is in fact mapped  to the hardware (not the guest, but vmx
> microcode does access it).  Only one register, the TPR, is ever used. 
> It's possible to re-layout the data structure so that the TPR stays in
> the same place while everything else becomes contiguous, but we'll have
> to do it again if the hardware starts mapping more registers.

I would avoid that by having a compressed version which reflects the
SW state and the mapped one which allows the vmx microcode to fiddle
with the TPR. If you need more registers in the HW page then you don't
have to worry about the layout and just have a proper accessor for
that.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] kvm: optimize ISR lookups
  2012-05-22 10:46           ` Avi Kivity
@ 2012-05-23 14:48             ` Ingo Molnar
  2012-05-23 15:03               ` Avi Kivity
  2012-05-23 15:14               ` Michael S. Tsirkin
  0 siblings, 2 replies; 28+ messages in thread
From: Ingo Molnar @ 2012-05-23 14:48 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Thomas Gleixner, Michael S. Tsirkin, kvm, Marcelo Tosatti,
	Ingo Molnar, H. Peter Anvin, x86, linux-kernel


* Avi Kivity <avi@redhat.com> wrote:

> On 05/22/2012 02:01 AM, Thomas Gleixner wrote:
> > > 
> > > Others are not my fault :)
> > > 
> > > Seriously, if Avi/Marcelo want to rewrite the ISR emulation
> >
> > Interesting POV, really.
> >
> > Did you ever notice that the kernel is a collaborative effort and not
> > controlled by "Avi/Marcelo"?
> >
> > Did you ever notice that arch/x86/kvm is part of arch/x86? 
> 
> This is silly.  Most of the time the kernel is advanced by 
> incremental patches.  Sometimes it is advanced by minor or 
> major refactoring.  It is never advanced by personal attacks 
> on contributors.

Thomas wasn't so much doing a personal attack, it was pointing 
out stupidity and then it was mocking the repeated stupidity. He 
very politely explained his point of view (with which I agree), 
and then you guys pressed the issue and there's just so many 
hours in the merge window, so you asked to be flamed ...

Avi, if you cannot be brought to properly reject incomplete 
patches going in the wrong direction then others maintainers 
interested in the code will do it.

If you start to consistently require from KVM contributors 
"incremental updates" in the right direction, not piling crap on 
crap, then such incidents won't happen. This isn't the first 
such incident but there's hope that it might be the last one.

The rule in arch/x86/ (and many other subsystems) is very 
simple: we don't speed up crappy code. If you want to speed it 
up then make it clean first, *then* is it suited for speedups. 
Crappy code is fragile and bound to introduce bugs, and crappy 
code leads to continued increased maintenance overhead, so 
crappy code is basically under a perpetual code freeze until 
it's uncrapped.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] kvm: optimize ISR lookups
  2012-05-23 14:48             ` Ingo Molnar
@ 2012-05-23 15:03               ` Avi Kivity
  2012-05-23 20:10                 ` Thomas Gleixner
  2012-05-23 15:14               ` Michael S. Tsirkin
  1 sibling, 1 reply; 28+ messages in thread
From: Avi Kivity @ 2012-05-23 15:03 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Thomas Gleixner, Michael S. Tsirkin, kvm, Marcelo Tosatti,
	Ingo Molnar, H. Peter Anvin, x86, linux-kernel

On 05/23/2012 05:48 PM, Ingo Molnar wrote:
>> 
>> This is silly.  Most of the time the kernel is advanced by 
>> incremental patches.  Sometimes it is advanced by minor or 
>> major refactoring.  It is never advanced by personal attacks 
>> on contributors.
> 
> Thomas wasn't so much doing a personal attack, it was pointing 
> out stupidity and then it was mocking the repeated stupidity. He 
> very politely explained his point of view (with which I agree), 

I guess we disagree on what is polite or not.  Mocking, for example,
isn't part of it in my book.

> and then you guys pressed the issue and there's just so many 
> hours in the merge window, so you asked to be flamed ...

There is a theory that flaming is a necessary part of kernel
development, but not all maintainers and developers agree with it.
Unfortunately many influential maintainers do.

> 
> Avi, if you cannot be brought to properly reject incomplete 
> patches going in the wrong direction then others maintainers 
> interested in the code will do it.

I happen not to think this is going in the wrong direction, and I
explained why.

> 
> If you start to consistently require from KVM contributors 
> "incremental updates" in the right direction, not piling crap on 
> crap, then such incidents won't happen. This isn't the first 
> such incident but there's hope that it might be the last one.

Feel free to point out, politely, when such things happen.  Even if you
don't think it will work for some reason, please try it out as an
experiment.

> The rule in arch/x86/ (and many other subsystems) is very 
> simple: we don't speed up crappy code. If you want to speed it 
> up then make it clean first, *then* is it suited for speedups. 
> Crappy code is fragile and bound to introduce bugs, and crappy 
> code leads to continued increased maintenance overhead, so 
> crappy code is basically under a perpetual code freeze until 
> it's uncrapped.

I agree with this as a general principle, but as it happens this
particular bit cannot be uncrapped due to hardware constraints.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] kvm: optimize ISR lookups
  2012-05-22 17:26     ` Thomas Gleixner
@ 2012-05-23 15:10       ` Avi Kivity
  2012-05-23 18:37         ` Thomas Gleixner
  0 siblings, 1 reply; 28+ messages in thread
From: Avi Kivity @ 2012-05-23 15:10 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Michael S. Tsirkin, kvm, Marcelo Tosatti, Ingo Molnar,
	H. Peter Anvin, x86, linux-kernel

On 05/22/2012 08:26 PM, Thomas Gleixner wrote:
> On Tue, 22 May 2012, Avi Kivity wrote:
>> On 05/22/2012 12:04 AM, Thomas Gleixner wrote:
>> > The only justification for having the same layout as the actual
>> > hardware is when you are going to map the memory into the guest space,
>> > which is not the case here.
>>
>> The APIC page is in fact mapped  to the hardware (not the guest, but vmx
>> microcode does access it).  Only one register, the TPR, is ever used. 
>> It's possible to re-layout the data structure so that the TPR stays in
>> the same place while everything else becomes contiguous, but we'll have
>> to do it again if the hardware starts mapping more registers.
> 
> I would avoid that by having a compressed version which reflects the
> SW state and the mapped one which allows the vmx microcode to fiddle
> with the TPR. If you need more registers in the HW page then you don't
> have to worry about the layout and just have a proper accessor for
> that.

That works, but replaces one problem with another: now we have two
sources for the same data, and need to juggle between them depending on
register number (either synchronizing in both directions, or special
casing); so you're simplifying one thing at the expense of the other.
If the microcode starts accessing more registers, then having two
layouts becomes even uglier.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] kvm: optimize ISR lookups
  2012-05-23 14:48             ` Ingo Molnar
  2012-05-23 15:03               ` Avi Kivity
@ 2012-05-23 15:14               ` Michael S. Tsirkin
  2012-05-23 19:22                 ` H. Peter Anvin
  1 sibling, 1 reply; 28+ messages in thread
From: Michael S. Tsirkin @ 2012-05-23 15:14 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Avi Kivity, Thomas Gleixner, kvm, Marcelo Tosatti, Ingo Molnar,
	H. Peter Anvin, x86, linux-kernel

On Wed, May 23, 2012 at 04:48:46PM +0200, Ingo Molnar wrote:
> there's just so many hours in the merge window, so you asked to be
> flamed ...

I can handle flames just fine :) But I'll try to buffer x86 things up
until the window closes next time.

-- 
MST

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] kvm: optimize ISR lookups
  2012-05-23 15:10       ` Avi Kivity
@ 2012-05-23 18:37         ` Thomas Gleixner
  2012-05-23 19:25           ` H. Peter Anvin
  0 siblings, 1 reply; 28+ messages in thread
From: Thomas Gleixner @ 2012-05-23 18:37 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Michael S. Tsirkin, kvm, Marcelo Tosatti, Ingo Molnar,
	H. Peter Anvin, x86, linux-kernel

On Wed, 23 May 2012, Avi Kivity wrote:

> On 05/22/2012 08:26 PM, Thomas Gleixner wrote:
> > On Tue, 22 May 2012, Avi Kivity wrote:
> >> On 05/22/2012 12:04 AM, Thomas Gleixner wrote:
> >> > The only justification for having the same layout as the actual
> >> > hardware is when you are going to map the memory into the guest space,
> >> > which is not the case here.
> >>
> >> The APIC page is in fact mapped  to the hardware (not the guest, but vmx
> >> microcode does access it).  Only one register, the TPR, is ever used. 
> >> It's possible to re-layout the data structure so that the TPR stays in
> >> the same place while everything else becomes contiguous, but we'll have
> >> to do it again if the hardware starts mapping more registers.
> > 
> > I would avoid that by having a compressed version which reflects the
> > SW state and the mapped one which allows the vmx microcode to fiddle
> > with the TPR. If you need more registers in the HW page then you don't
> > have to worry about the layout and just have a proper accessor for
> > that.
> 
> That works, but replaces one problem with another: now we have two
> sources for the same data, and need to juggle between them depending on
> register number (either synchronizing in both directions, or special
> casing); so you're simplifying one thing at the expense of the other.
> If the microcode starts accessing more registers, then having two
> layouts becomes even uglier.

Fair enough :)

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] kvm: optimize ISR lookups
  2012-05-23 15:14               ` Michael S. Tsirkin
@ 2012-05-23 19:22                 ` H. Peter Anvin
  0 siblings, 0 replies; 28+ messages in thread
From: H. Peter Anvin @ 2012-05-23 19:22 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Ingo Molnar, Avi Kivity, Thomas Gleixner, kvm, Marcelo Tosatti,
	Ingo Molnar, x86, linux-kernel

On 05/23/2012 08:14 AM, Michael S. Tsirkin wrote:
> On Wed, May 23, 2012 at 04:48:46PM +0200, Ingo Molnar wrote:
>> there's just so many hours in the merge window, so you asked to be
>> flamed ...
> 
> I can handle flames just fine :) But I'll try to buffer x86 things up
> until the window closes next time.
> 

It's generally a good rule for anything for beyond the merge window
(i.e. for 3.6 in this case.)

It is *way* too easy to get down rathole confusions otherwise.

	-hpa


-- 
H. Peter Anvin, Intel Open Source Technology Center
I work for Intel.  I don't speak on their behalf.


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] kvm: optimize ISR lookups
  2012-05-23 18:37         ` Thomas Gleixner
@ 2012-05-23 19:25           ` H. Peter Anvin
  2012-05-23 22:00             ` Thomas Gleixner
  0 siblings, 1 reply; 28+ messages in thread
From: H. Peter Anvin @ 2012-05-23 19:25 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Avi Kivity, Michael S. Tsirkin, kvm, Marcelo Tosatti,
	Ingo Molnar, x86, linux-kernel

On 05/23/2012 11:37 AM, Thomas Gleixner wrote:
>>
>> That works, but replaces one problem with another: now we have two
>> sources for the same data, and need to juggle between them depending on
>> register number (either synchronizing in both directions, or special
>> casing); so you're simplifying one thing at the expense of the other.
>> If the microcode starts accessing more registers, then having two
>> layouts becomes even uglier.
> 
> Fair enough :)

Yes, the µcode accessing this data structure directly probably falls
under the category of a legitimate need to stick to the hardware format.

	-hpa

-- 
H. Peter Anvin, Intel Open Source Technology Center
I work for Intel.  I don't speak on their behalf.


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] kvm: optimize ISR lookups
  2012-05-23 15:03               ` Avi Kivity
@ 2012-05-23 20:10                 ` Thomas Gleixner
  2012-05-23 20:46                   ` Michael S. Tsirkin
  0 siblings, 1 reply; 28+ messages in thread
From: Thomas Gleixner @ 2012-05-23 20:10 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Ingo Molnar, Michael S. Tsirkin, kvm, Marcelo Tosatti,
	Ingo Molnar, H. Peter Anvin, x86, linux-kernel

On Wed, 23 May 2012, Avi Kivity wrote:
> On 05/23/2012 05:48 PM, Ingo Molnar wrote:
> >> 
> >> This is silly.  Most of the time the kernel is advanced by 
> >> incremental patches.  Sometimes it is advanced by minor or 
> >> major refactoring.  It is never advanced by personal attacks 
> >> on contributors.
> > 
> > Thomas wasn't so much doing a personal attack, it was pointing 
> > out stupidity and then it was mocking the repeated stupidity. He 
> > very politely explained his point of view (with which I agree), 
> 
> I guess we disagree on what is polite or not.  Mocking, for example,
> isn't part of it in my book.

I really avoid flaming as far as it goes, but I consider that ignoring
a thorough patch review and replying only on the very obvious problem
is a massive form of impoliteness. Replying on a still polite reminder
with a sloppy "I just took what's there and implemeted the
optimization which I was tasked with" is even more of an offense. It
clearly shows that there is no interest in making the whole thing
better and just aims for quick and dirty "here are my benchmark
results" success.

This is what actually triggered me to switch into grumpy mode.

I'm really tired of that attitude. It's the root cause for the steady
increasing mess in the kernel.

It forces people who actually care to waste an endless amount of time
to clean up what has been just slapped into the code base quick and
dirty. And you expect those people to stay calm and polite if they get
ignored and ridiculed with sloppy replies?

You may be a saint, but I'm definitely too old to cope with that.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] kvm: optimize ISR lookups
  2012-05-23 20:10                 ` Thomas Gleixner
@ 2012-05-23 20:46                   ` Michael S. Tsirkin
  2012-05-23 23:02                     ` Thomas Gleixner
  0 siblings, 1 reply; 28+ messages in thread
From: Michael S. Tsirkin @ 2012-05-23 20:46 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Avi Kivity, Ingo Molnar, kvm, Marcelo Tosatti, Ingo Molnar,
	H. Peter Anvin, x86, linux-kernel

On Wed, May 23, 2012 at 10:10:27PM +0200, Thomas Gleixner wrote:
> Replying on a still polite reminder with a sloppy "I just took what's
> there and implemeted the optimization which I was tasked with" is even
> more of an offense.

Ow.  That 'not my fault' line was a joke.

-- 
MST

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] kvm: optimize ISR lookups
  2012-05-23 19:25           ` H. Peter Anvin
@ 2012-05-23 22:00             ` Thomas Gleixner
  2012-05-30 14:18               ` Avi Kivity
  0 siblings, 1 reply; 28+ messages in thread
From: Thomas Gleixner @ 2012-05-23 22:00 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Avi Kivity, Michael S. Tsirkin, kvm, Marcelo Tosatti,
	Ingo Molnar, x86, linux-kernel

[-- Attachment #1: Type: TEXT/PLAIN, Size: 3009 bytes --]

On Wed, 23 May 2012, H. Peter Anvin wrote:

> On 05/23/2012 11:37 AM, Thomas Gleixner wrote:
> >>
> >> That works, but replaces one problem with another: now we have two
> >> sources for the same data, and need to juggle between them depending on
> >> register number (either synchronizing in both directions, or special
> >> casing); so you're simplifying one thing at the expense of the other.
> >> If the microcode starts accessing more registers, then having two
> >> layouts becomes even uglier.
> > 
> > Fair enough :)
> 
> Yes, the µcode accessing this data structure directly probably falls
> under the category of a legitimate need to stick to the hardware format.

Thought more about that.

We have a clear distinction between HW accessed data and software
accessed data.

If I look at TPR then it is special cased already and it does:

   case APIC_TASKPRI:
                report_tpr_access(apic, false|true);
                /* fall thru */

And the fall through is using the general accessor for all not special
cased registers.

So all you have to do is 

   case APIC_TASKPRI:
                report_tpr_access(apic, false|true);
+		return access_mapped_reg(...);

Instead of the fall through.

So there is no synchronizing back and forth problem simply because you
already have a special case for that register.

I know you'll argue that the tpr reporting is a special hack for
windows guests, at least that's what the changelog tells.

But even if we have a few more registers accessed by hardware and if
they do not require a special casing, I really doubt that the overhead
of special casing those few regs will be worse than not having the
obvious optimization in place.

And looking deeper it's a total non issue. The apic mapping is 4k. The
register stride is strictly 0x10. That makes a total of 256 possible
registers.

So now you have two possibilites:

1) Create a 256 bit == 64byte bitfield to select the one or the other
   representation.

   The overhead of checking the bit is not going to be observable.

2) Create a 256 function pointer array == 2k resp. 1k (64 / 32bit)

   That's not a lot of memory even if you have to maintain two
   separate variants for read and write, but it allows you to get rid
   of the already horribly compiled switch case in apic_read/write and
   you'll get the optional stuff like report_tpr_access() w/o extra
   conditionals just for free.

   An extra goodie is that you can catch any access to a non existing
   register which you now just silently ignore.  And that allows you
   to react on any future hardware oddities without adding a single
   runtime conditional.

   This is stricly x86 and x86 is way better at dealing with indirect
   calls than with the mess gcc creates compiling those switch case
   constructs.

   So I'd go for that and rather spend the memory and the time in
   setting up the function pointers on init/ioctl than dealing with
   the inconsistency of HW/SW representation with magic hacks.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] kvm: optimize ISR lookups
  2012-05-23 20:46                   ` Michael S. Tsirkin
@ 2012-05-23 23:02                     ` Thomas Gleixner
  0 siblings, 0 replies; 28+ messages in thread
From: Thomas Gleixner @ 2012-05-23 23:02 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Avi Kivity, Ingo Molnar, kvm, Marcelo Tosatti, Ingo Molnar,
	H. Peter Anvin, x86, linux-kernel

On Wed, 23 May 2012, Michael S. Tsirkin wrote:

> On Wed, May 23, 2012 at 10:10:27PM +0200, Thomas Gleixner wrote:
> > Replying on a still polite reminder with a sloppy "I just took what's
> > there and implemeted the optimization which I was tasked with" is even
> > more of an offense.
> 
> Ow.  That 'not my fault' line was a joke.

Which I obviously missed to get after spending quite some time to grok
the whole thing.

I still don't get it. Must be a cultural or an age thing or both.


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] kvm: optimize ISR lookups
  2012-05-23 22:00             ` Thomas Gleixner
@ 2012-05-30 14:18               ` Avi Kivity
  0 siblings, 0 replies; 28+ messages in thread
From: Avi Kivity @ 2012-05-30 14:18 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: H. Peter Anvin, Michael S. Tsirkin, kvm, Marcelo Tosatti,
	Ingo Molnar, x86, linux-kernel

On 05/24/2012 01:00 AM, Thomas Gleixner wrote:

> Thought more about that.
> 
> We have a clear distinction between HW accessed data and software
> accessed data.
> 
> If I look at TPR then it is special cased already and it does:
> 
>    case APIC_TASKPRI:
>                 report_tpr_access(apic, false|true);
>                 /* fall thru */
> 
> And the fall through is using the general accessor for all not special
> cased registers.
> 
> So all you have to do is 
> 
>    case APIC_TASKPRI:
>                 report_tpr_access(apic, false|true);
> +		return access_mapped_reg(...);
> 
> Instead of the fall through.
> 
> So there is no synchronizing back and forth problem simply because you
> already have a special case for that register.
> 
> I know you'll argue that the tpr reporting is a special hack for
> windows guests, at least that's what the changelog tells.
> 
> But even if we have a few more registers accessed by hardware and if
> they do not require a special casing, I really doubt that the overhead
> of special casing those few regs will be worse than not having the
> obvious optimization in place.
> 
> And looking deeper it's a total non issue. The apic mapping is 4k. The
> register stride is strictly 0x10. That makes a total of 256 possible
> registers.
> 
> So now you have two possibilites:
> 
> 1) Create a 256 bit == 64byte bitfield to select the one or the other
>    representation.
> 
>    The overhead of checking the bit is not going to be observable.
> 
> 2) Create a 256 function pointer array == 2k resp. 1k (64 / 32bit)
> 
>    That's not a lot of memory even if you have to maintain two
>    separate variants for read and write, but it allows you to get rid
>    of the already horribly compiled switch case in apic_read/write and
>    you'll get the optional stuff like report_tpr_access() w/o extra
>    conditionals just for free.
> 
>    An extra goodie is that you can catch any access to a non existing
>    register which you now just silently ignore.  And that allows you
>    to react on any future hardware oddities without adding a single
>    runtime conditional.
> 
>    This is stricly x86 and x86 is way better at dealing with indirect
>    calls than with the mess gcc creates compiling those switch case
>    constructs.
> 
>    So I'd go for that and rather spend the memory and the time in
>    setting up the function pointers on init/ioctl than dealing with
>    the inconsistency of HW/SW representation with magic hacks.
> 

I like the bitmap version, it seems very lightweight.  But by itself it
doesn't allow us to use bitmap_weight (or the other bitmap accessors),
unless you assume beforehand that those registers will never be in the
hardware-layout region.

(you also need extra code for copying the APIC state to and from
userspace; right now we just memcpy the APIC page)


-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 28+ messages in thread

end of thread, other threads:[~2012-05-30 15:05 UTC | newest]

Thread overview: 28+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-05-21 16:37 [PATCH] kvm: optimize ISR lookups Michael S. Tsirkin
2012-05-21 18:44 ` Michael S. Tsirkin
2012-05-21 21:04 ` Thomas Gleixner
2012-05-21 21:51   ` Michael S. Tsirkin
2012-05-21 22:14     ` Thomas Gleixner
2012-05-21 22:24       ` Michael S. Tsirkin
2012-05-21 22:44         ` Thomas Gleixner
2012-05-21 22:50           ` Michael S. Tsirkin
2012-05-21 23:01         ` Thomas Gleixner
2012-05-22 10:46           ` Avi Kivity
2012-05-23 14:48             ` Ingo Molnar
2012-05-23 15:03               ` Avi Kivity
2012-05-23 20:10                 ` Thomas Gleixner
2012-05-23 20:46                   ` Michael S. Tsirkin
2012-05-23 23:02                     ` Thomas Gleixner
2012-05-23 15:14               ` Michael S. Tsirkin
2012-05-23 19:22                 ` H. Peter Anvin
2012-05-21 23:11         ` Thomas Gleixner
2012-05-21 23:06   ` Michael S. Tsirkin
2012-05-21 23:12     ` H. Peter Anvin
2012-05-21 23:36     ` Thomas Gleixner
2012-05-22 10:59   ` Avi Kivity
2012-05-22 17:26     ` Thomas Gleixner
2012-05-23 15:10       ` Avi Kivity
2012-05-23 18:37         ` Thomas Gleixner
2012-05-23 19:25           ` H. Peter Anvin
2012-05-23 22:00             ` Thomas Gleixner
2012-05-30 14:18               ` Avi Kivity

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.