All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 2/2] KVM: Convert read-only users of vm_list to RCU
       [not found] <4D512EF7.8040409@siemens.com>
@ 2011-02-08 11:55 ` Jan Kiszka
  2011-02-10 10:16   ` Avi Kivity
  2011-02-15 12:32   ` Marcelo Tosatti
  0 siblings, 2 replies; 16+ messages in thread
From: Jan Kiszka @ 2011-02-08 11:55 UTC (permalink / raw)
  To: Avi Kivity, Marcelo Tosatti; +Cc: kvm, Zachary Amsden

Only for walking the list of VMs, we do not need to hold the preemption
disabling kvm_lock. Convert stat services, the cpufreq callback and
mmu_shrink to RCU. For the latter, special care is required to
synchronize its list_move_tail with kvm_destroy_vm.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---
 arch/x86/kvm/mmu.c       |   14 +++++++++-----
 arch/x86/kvm/x86.c       |    4 ++--
 include/linux/kvm_host.h |    1 +
 virt/kvm/kvm_main.c      |   17 ++++++++++-------
 4 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index b6a9963..e9d0ed8 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3587,9 +3587,9 @@ static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
 	if (nr_to_scan == 0)
 		goto out;
 
-	raw_spin_lock(&kvm_lock);
+	rcu_read_lock();
 
-	list_for_each_entry(kvm, &vm_list, vm_list) {
+	list_for_each_entry_rcu(kvm, &vm_list, vm_list) {
 		int idx, freed_pages;
 		LIST_HEAD(invalid_list);
 
@@ -3607,10 +3607,14 @@ static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
 		spin_unlock(&kvm->mmu_lock);
 		srcu_read_unlock(&kvm->srcu, idx);
 	}
-	if (kvm_freed)
-		list_move_tail(&kvm_freed->vm_list, &vm_list);
+	if (kvm_freed) {
+		raw_spin_lock(&kvm_lock);
+		if (!kvm->deleted)
+			list_move_tail(&kvm_freed->vm_list, &vm_list);
+		raw_spin_unlock(&kvm_lock);
+	}
 
-	raw_spin_unlock(&kvm_lock);
+	rcu_read_unlock();
 
 out:
 	return percpu_counter_read_positive(&kvm_total_used_mmu_pages);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ae6e75b..01fa1ea 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4578,7 +4578,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
 
 	smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
 
-	raw_spin_lock(&kvm_lock);
+	rcu_read_lock();
 	list_for_each_entry(kvm, &vm_list, vm_list) {
 		kvm_for_each_vcpu(i, vcpu, kvm) {
 			if (vcpu->cpu != freq->cpu)
@@ -4588,7 +4588,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
 				send_ipi = 1;
 		}
 	}
-	raw_spin_unlock(&kvm_lock);
+	rcu_read_unlock();
 
 	if (freq->old < freq->new && send_ipi) {
 		/*
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index c8dee22..9074cac 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -234,6 +234,7 @@ struct kvm {
 #endif
 	struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
 	atomic_t online_vcpus;
+	bool deleted;
 	struct list_head vm_list;
 	struct mutex lock;
 	struct kvm_io_bus *buses[KVM_NR_BUSES];
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 7b70c67..b5a05b5 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -546,9 +546,13 @@ static void kvm_destroy_vm(struct kvm *kvm)
 	struct mm_struct *mm = kvm->mm;
 
 	kvm_arch_sync_events(kvm);
+
 	raw_spin_lock(&kvm_lock);
 	list_del(&kvm->vm_list);
+	kvm->deleted = true;
 	raw_spin_unlock(&kvm_lock);
+	synchronize_rcu();
+
 	kvm_free_irq_routing(kvm);
 	for (i = 0; i < KVM_NR_BUSES; i++)
 		kvm_io_bus_destroy(kvm->buses[i]);
@@ -2347,10 +2351,10 @@ static int vm_stat_get(void *_offset, u64 *val)
 	struct kvm *kvm;
 
 	*val = 0;
-	raw_spin_lock(&kvm_lock);
-	list_for_each_entry(kvm, &vm_list, vm_list)
+	rcu_read_lock();
+	list_for_each_entry_rcu(kvm, &vm_list, vm_list)
 		*val += *(u32 *)((void *)kvm + offset);
-	raw_spin_unlock(&kvm_lock);
+	rcu_read_unlock();
 	return 0;
 }
 
@@ -2364,12 +2368,11 @@ static int vcpu_stat_get(void *_offset, u64 *val)
 	int i;
 
 	*val = 0;
-	raw_spin_lock(&kvm_lock);
-	list_for_each_entry(kvm, &vm_list, vm_list)
+	rcu_read_lock();
+	list_for_each_entry_rcu(kvm, &vm_list, vm_list)
 		kvm_for_each_vcpu(i, vcpu, kvm)
 			*val += *(u32 *)((void *)vcpu + offset);
-
-	raw_spin_unlock(&kvm_lock);
+	rcu_read_unlock();
 	return 0;
 }
 
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] KVM: Convert read-only users of vm_list to RCU
  2011-02-08 11:55 ` [PATCH 2/2] KVM: Convert read-only users of vm_list to RCU Jan Kiszka
@ 2011-02-10 10:16   ` Avi Kivity
  2011-02-10 11:31     ` Jan Kiszka
  2011-02-15 12:32   ` Marcelo Tosatti
  1 sibling, 1 reply; 16+ messages in thread
From: Avi Kivity @ 2011-02-10 10:16 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Marcelo Tosatti, kvm, Zachary Amsden

On 02/08/2011 01:55 PM, Jan Kiszka wrote:
> Only for walking the list of VMs, we do not need to hold the preemption
> disabling kvm_lock. Convert stat services, the cpufreq callback and
> mmu_shrink to RCU. For the latter, special care is required to
> synchronize its list_move_tail with kvm_destroy_vm.
>
>
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index b6a9963..e9d0ed8 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -3587,9 +3587,9 @@ static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
>   	if (nr_to_scan == 0)
>   		goto out;
>
> -	raw_spin_lock(&kvm_lock);
> +	rcu_read_lock();
>
> -	list_for_each_entry(kvm,&vm_list, vm_list) {
> +	list_for_each_entry_rcu(kvm,&vm_list, vm_list) {
>   		int idx, freed_pages;
>   		LIST_HEAD(invalid_list);

Have to #include rculist.h, and to change all list operations on vm_list 
to rcu variants.

>
> @@ -3607,10 +3607,14 @@ static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
>   		spin_unlock(&kvm->mmu_lock);
>   		srcu_read_unlock(&kvm->srcu, idx);
>   	}
> -	if (kvm_freed)
> -		list_move_tail(&kvm_freed->vm_list,&vm_list);
> +	if (kvm_freed) {
> +		raw_spin_lock(&kvm_lock);
> +		if (!kvm->deleted)
> +			list_move_tail(&kvm_freed->vm_list,&vm_list);

There is no list_move_tail_rcu().

Why check kvm->deleted?  it's in the process of being torn down anyway, 
it doesn't matter if mmu_shrink or kvm_destroy_vm pulls the trigger.

> +		raw_spin_unlock(&kvm_lock);
> +	}
>
> -	raw_spin_unlock(&kvm_lock);
> +	rcu_read_unlock();
>



-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] KVM: Convert read-only users of vm_list to RCU
  2011-02-10 10:16   ` Avi Kivity
@ 2011-02-10 11:31     ` Jan Kiszka
  2011-02-10 12:34       ` Avi Kivity
  0 siblings, 1 reply; 16+ messages in thread
From: Jan Kiszka @ 2011-02-10 11:31 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, kvm, Zachary Amsden

On 2011-02-10 11:16, Avi Kivity wrote:
> On 02/08/2011 01:55 PM, Jan Kiszka wrote:
>> Only for walking the list of VMs, we do not need to hold the preemption
>> disabling kvm_lock. Convert stat services, the cpufreq callback and
>> mmu_shrink to RCU. For the latter, special care is required to
>> synchronize its list_move_tail with kvm_destroy_vm.
>>
>>
>> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
>> index b6a9963..e9d0ed8 100644
>> --- a/arch/x86/kvm/mmu.c
>> +++ b/arch/x86/kvm/mmu.c
>> @@ -3587,9 +3587,9 @@ static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
>>   	if (nr_to_scan == 0)
>>   		goto out;
>>
>> -	raw_spin_lock(&kvm_lock);
>> +	rcu_read_lock();
>>
>> -	list_for_each_entry(kvm,&vm_list, vm_list) {
>> +	list_for_each_entry_rcu(kvm,&vm_list, vm_list) {
>>   		int idx, freed_pages;
>>   		LIST_HEAD(invalid_list);
> 
> Have to #include rculist.h,

OK.

> and to change all list operations on vm_list 
> to rcu variants.

Not sure if we have such variants for all cases...

> 
>>
>> @@ -3607,10 +3607,14 @@ static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
>>   		spin_unlock(&kvm->mmu_lock);
>>   		srcu_read_unlock(&kvm->srcu, idx);
>>   	}
>> -	if (kvm_freed)
>> -		list_move_tail(&kvm_freed->vm_list,&vm_list);
>> +	if (kvm_freed) {
>> +		raw_spin_lock(&kvm_lock);
>> +		if (!kvm->deleted)
>> +			list_move_tail(&kvm_freed->vm_list,&vm_list);
> 
> There is no list_move_tail_rcu().

...specifically not for this one.

> 
> Why check kvm->deleted?  it's in the process of being torn down anyway, 
> it doesn't matter if mmu_shrink or kvm_destroy_vm pulls the trigger.

kvm_destroy_vm removes a vm from the list while mmu_shrink is running.
Then mmu_shrink's list_move_tail will re-add that vm to the list tail
again (unless already the removal in move_tail produces a crash).

Jan

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] KVM: Convert read-only users of vm_list to RCU
  2011-02-10 11:31     ` Jan Kiszka
@ 2011-02-10 12:34       ` Avi Kivity
  2011-02-10 12:45         ` Jan Kiszka
  0 siblings, 1 reply; 16+ messages in thread
From: Avi Kivity @ 2011-02-10 12:34 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Marcelo Tosatti, kvm, Zachary Amsden

On 02/10/2011 01:31 PM, Jan Kiszka wrote:
> >
> >>
> >>  @@ -3607,10 +3607,14 @@ static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
> >>    		spin_unlock(&kvm->mmu_lock);
> >>    		srcu_read_unlock(&kvm->srcu, idx);
> >>    	}
> >>  -	if (kvm_freed)
> >>  -		list_move_tail(&kvm_freed->vm_list,&vm_list);
> >>  +	if (kvm_freed) {
> >>  +		raw_spin_lock(&kvm_lock);
> >>  +		if (!kvm->deleted)
> >>  +			list_move_tail(&kvm_freed->vm_list,&vm_list);
> >
> >  There is no list_move_tail_rcu().
>
> ...specifically not for this one.

Well, we can add one if needed (and if possible).

> >
> >  Why check kvm->deleted?  it's in the process of being torn down anyway,
> >  it doesn't matter if mmu_shrink or kvm_destroy_vm pulls the trigger.
>
> kvm_destroy_vm removes a vm from the list while mmu_shrink is running.
> Then mmu_shrink's list_move_tail will re-add that vm to the list tail
> again (unless already the removal in move_tail produces a crash).

It's too subtle.  Communication across threads with a variable needs 
memory barriers (even though they're nops on x86) and documentation.

btw, not even sure if it's legal: you have a mutating call within an rcu 
read critical section for the same object.  If synchronize_rcu() were 
called there, would it ever terminate?

(not that synchronize_rcu() is a good thing there, better do it with 
call_rcu()).

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] KVM: Convert read-only users of vm_list to RCU
  2011-02-10 12:34       ` Avi Kivity
@ 2011-02-10 12:45         ` Jan Kiszka
  2011-02-10 12:56           ` Avi Kivity
  0 siblings, 1 reply; 16+ messages in thread
From: Jan Kiszka @ 2011-02-10 12:45 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, kvm, Zachary Amsden

On 2011-02-10 13:34, Avi Kivity wrote:
> On 02/10/2011 01:31 PM, Jan Kiszka wrote:
>>>
>>>>
>>>>  @@ -3607,10 +3607,14 @@ static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
>>>>    		spin_unlock(&kvm->mmu_lock);
>>>>    		srcu_read_unlock(&kvm->srcu, idx);
>>>>    	}
>>>>  -	if (kvm_freed)
>>>>  -		list_move_tail(&kvm_freed->vm_list,&vm_list);
>>>>  +	if (kvm_freed) {
>>>>  +		raw_spin_lock(&kvm_lock);
>>>>  +		if (!kvm->deleted)
>>>>  +			list_move_tail(&kvm_freed->vm_list,&vm_list);
>>>
>>>  There is no list_move_tail_rcu().
>>
>> ...specifically not for this one.
> 
> Well, we can add one if needed (and if possible).

I can have a look, at least at the lower hanging fruits.

> 
>>>
>>>  Why check kvm->deleted?  it's in the process of being torn down anyway,
>>>  it doesn't matter if mmu_shrink or kvm_destroy_vm pulls the trigger.
>>
>> kvm_destroy_vm removes a vm from the list while mmu_shrink is running.
>> Then mmu_shrink's list_move_tail will re-add that vm to the list tail
>> again (unless already the removal in move_tail produces a crash).
> 
> It's too subtle.  Communication across threads with a variable needs 
> memory barriers (even though they're nops on x86) and documentation.

The barriers are provided by this spin lock we acquire for testing are
modifying deleted.

> 
> btw, not even sure if it's legal: you have a mutating call within an rcu 
> read critical section for the same object.  If synchronize_rcu() were 
> called there, would it ever terminate?

Why not? kvm_destroy_vm is not preventing blocking mmu_shrink to acquire
the kvm_lock where we then find the vm deleted and release both kvm_lock
and the rcu read "lock" afterwards.

> 
> (not that synchronize_rcu() is a good thing there, better do it with 
> call_rcu()).

What's the benefit? The downside is a bit more complexity as you need an
additional callback handler.

Jan

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] KVM: Convert read-only users of vm_list to RCU
  2011-02-10 12:45         ` Jan Kiszka
@ 2011-02-10 12:56           ` Avi Kivity
  2011-02-10 12:57             ` Avi Kivity
  0 siblings, 1 reply; 16+ messages in thread
From: Avi Kivity @ 2011-02-10 12:56 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Marcelo Tosatti, kvm, Zachary Amsden

On 02/10/2011 02:45 PM, Jan Kiszka wrote:
> >>>
> >>>   There is no list_move_tail_rcu().
> >>
> >>  ...specifically not for this one.
> >
> >  Well, we can add one if needed (and if possible).
>
> I can have a look, at least at the lower hanging fruits.

Please keep rcu->parent in the loop.

> >
> >>>
> >>>   Why check kvm->deleted?  it's in the process of being torn down anyway,
> >>>   it doesn't matter if mmu_shrink or kvm_destroy_vm pulls the trigger.
> >>
> >>  kvm_destroy_vm removes a vm from the list while mmu_shrink is running.
> >>  Then mmu_shrink's list_move_tail will re-add that vm to the list tail
> >>  again (unless already the removal in move_tail produces a crash).
> >
> >  It's too subtle.  Communication across threads with a variable needs
> >  memory barriers (even though they're nops on x86) and documentation.
>
> The barriers are provided by this spin lock we acquire for testing are
> modifying deleted.

Right.

I'm not thrilled with adding ->deleted though.

> >
> >  btw, not even sure if it's legal: you have a mutating call within an rcu
> >  read critical section for the same object.  If synchronize_rcu() were
> >  called there, would it ever terminate?
>
> Why not? kvm_destroy_vm is not preventing blocking mmu_shrink to acquire
> the kvm_lock where we then find the vm deleted and release both kvm_lock
> and the rcu read "lock" afterwards.

synchronize_rcu() waits until all currently running rcu read-side 
critical sections are completed.  But we are in the middle of one, which 
isn't going to complete until it synchronize_rcu() returns.

> >
> >  (not that synchronize_rcu() is a good thing there, better do it with
> >  call_rcu()).
>
> What's the benefit? The downside is a bit more complexity as you need an
> additional callback handler.

synchronize_rcu() can be very slow (its a systemwide operation), and 
mmu_shrink() can be called often on a loaded system.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] KVM: Convert read-only users of vm_list to RCU
  2011-02-10 12:56           ` Avi Kivity
@ 2011-02-10 12:57             ` Avi Kivity
  2011-02-10 13:14               ` Jan Kiszka
  0 siblings, 1 reply; 16+ messages in thread
From: Avi Kivity @ 2011-02-10 12:57 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Marcelo Tosatti, kvm, Zachary Amsden

On 02/10/2011 02:56 PM, Avi Kivity wrote:
>> What's the benefit? The downside is a bit more complexity as you need an
>> additional callback handler.
>
>
> synchronize_rcu() can be very slow (its a systemwide operation), and 
> mmu_shrink() can be called often on a loaded system.
>

In fact this just shows that vm_list is not a good candidate for rcu; 
rcu is useful where most operations are reads, but if we discount stats, 
most operations on vm_list are going to be writes.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] KVM: Convert read-only users of vm_list to RCU
  2011-02-10 12:57             ` Avi Kivity
@ 2011-02-10 13:14               ` Jan Kiszka
  2011-02-10 13:19                 ` Avi Kivity
  0 siblings, 1 reply; 16+ messages in thread
From: Jan Kiszka @ 2011-02-10 13:14 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, kvm, Zachary Amsden

On 2011-02-10 13:57, Avi Kivity wrote:
> On 02/10/2011 02:56 PM, Avi Kivity wrote:
>>> What's the benefit? The downside is a bit more complexity as you need an
>>> additional callback handler.
>>
>>
>> synchronize_rcu() can be very slow (its a systemwide operation), and 
>> mmu_shrink() can be called often on a loaded system.
>>
> 
> In fact this just shows that vm_list is not a good candidate for rcu; 
> rcu is useful where most operations are reads, but if we discount stats, 
> most operations on vm_list are going to be writes.

Accept for mmu_shrink, which is write but not delete, thus works without
that slow synchronize_rcu. And I don't see the need for call_rcu in the
vm deletion path.

Jan

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] KVM: Convert read-only users of vm_list to RCU
  2011-02-10 13:14               ` Jan Kiszka
@ 2011-02-10 13:19                 ` Avi Kivity
  2011-02-10 13:47                   ` Jan Kiszka
  0 siblings, 1 reply; 16+ messages in thread
From: Avi Kivity @ 2011-02-10 13:19 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Marcelo Tosatti, kvm, Zachary Amsden

On 02/10/2011 03:14 PM, Jan Kiszka wrote:
> On 2011-02-10 13:57, Avi Kivity wrote:
> >  On 02/10/2011 02:56 PM, Avi Kivity wrote:
> >>>  What's the benefit? The downside is a bit more complexity as you need an
> >>>  additional callback handler.
> >>
> >>
> >>  synchronize_rcu() can be very slow (its a systemwide operation), and
> >>  mmu_shrink() can be called often on a loaded system.
> >>
> >
> >  In fact this just shows that vm_list is not a good candidate for rcu;
> >  rcu is useful where most operations are reads, but if we discount stats,
> >  most operations on vm_list are going to be writes.
>
> Accept for mmu_shrink, which is write but not delete, thus works without
> that slow synchronize_rcu.

I don't really see how you can implement list_move_rcu(), it has to be 
atomic or other users will see a partial vm_list.

>   And I don't see the need for call_rcu in the
> vm deletion path.

synchronize_rcu() is fine for vm destruction.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] KVM: Convert read-only users of vm_list to RCU
  2011-02-10 13:19                 ` Avi Kivity
@ 2011-02-10 13:47                   ` Jan Kiszka
  2011-02-10 14:26                     ` Avi Kivity
  0 siblings, 1 reply; 16+ messages in thread
From: Jan Kiszka @ 2011-02-10 13:47 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, kvm, Zachary Amsden

On 2011-02-10 14:19, Avi Kivity wrote:
> On 02/10/2011 03:14 PM, Jan Kiszka wrote:
>> On 2011-02-10 13:57, Avi Kivity wrote:
>>>  On 02/10/2011 02:56 PM, Avi Kivity wrote:
>>>>>  What's the benefit? The downside is a bit more complexity as you need an
>>>>>  additional callback handler.
>>>>
>>>>
>>>>  synchronize_rcu() can be very slow (its a systemwide operation), and
>>>>  mmu_shrink() can be called often on a loaded system.
>>>>
>>>
>>>  In fact this just shows that vm_list is not a good candidate for rcu;
>>>  rcu is useful where most operations are reads, but if we discount stats,
>>>  most operations on vm_list are going to be writes.
>>
>> Accept for mmu_shrink, which is write but not delete, thus works without
>> that slow synchronize_rcu.
> 
> I don't really see how you can implement list_move_rcu(), it has to be 
> atomic or other users will see a partial vm_list.

Right, even if we synchronized that step cleanly, rcu-protected users
could miss the moving vm during concurrent list walks.

What about using a separate mutex for protecting vm_list instead?
Unless I missed some detail, mmu_shrink should allow blocking.

Jan

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] KVM: Convert read-only users of vm_list to RCU
  2011-02-10 13:47                   ` Jan Kiszka
@ 2011-02-10 14:26                     ` Avi Kivity
  2011-02-10 14:34                       ` Jan Kiszka
  0 siblings, 1 reply; 16+ messages in thread
From: Avi Kivity @ 2011-02-10 14:26 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Marcelo Tosatti, kvm, Zachary Amsden

On 02/10/2011 03:47 PM, Jan Kiszka wrote:
> >>
> >>  Accept for mmu_shrink, which is write but not delete, thus works without
> >>  that slow synchronize_rcu.
> >
> >  I don't really see how you can implement list_move_rcu(), it has to be
> >  atomic or other users will see a partial vm_list.
>
> Right, even if we synchronized that step cleanly, rcu-protected users
> could miss the moving vm during concurrent list walks.
>
> What about using a separate mutex for protecting vm_list instead?
> Unless I missed some detail, mmu_shrink should allow blocking.

What else does kvm_lock protect?

I think we could simply reduce the amount of time we hold kvm_lock.  
Pick a vm, ref it, list_move_tail(), unlock, then do the actual 
shrinking.  Of course taking a ref must be done carefully, we might 
already be in kvm_destroy_vm() at that time.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] KVM: Convert read-only users of vm_list to RCU
  2011-02-10 14:26                     ` Avi Kivity
@ 2011-02-10 14:34                       ` Jan Kiszka
  2011-02-10 14:47                         ` Avi Kivity
  0 siblings, 1 reply; 16+ messages in thread
From: Jan Kiszka @ 2011-02-10 14:34 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, kvm, Zachary Amsden

On 2011-02-10 15:26, Avi Kivity wrote:
> On 02/10/2011 03:47 PM, Jan Kiszka wrote:
>>>>
>>>>  Accept for mmu_shrink, which is write but not delete, thus works without
>>>>  that slow synchronize_rcu.
>>>
>>>  I don't really see how you can implement list_move_rcu(), it has to be
>>>  atomic or other users will see a partial vm_list.
>>
>> Right, even if we synchronized that step cleanly, rcu-protected users
>> could miss the moving vm during concurrent list walks.
>>
>> What about using a separate mutex for protecting vm_list instead?
>> Unless I missed some detail, mmu_shrink should allow blocking.
> 
> What else does kvm_lock protect?

Someone tried to write a locking.txt and stated that it's also
protecting enabling/disabling hardware virtualization. But that guy may
have overlooked something.

> 
> I think we could simply reduce the amount of time we hold kvm_lock.  
> Pick a vm, ref it, list_move_tail(), unlock, then do the actual 
> shrinking.  Of course taking a ref must be done carefully, we might 
> already be in kvm_destroy_vm() at that time.
> 

Plain mutex held across the whole mmu_shrink loop is still simpler and
should be sufficient - unless we also have to deal with scalability
issues if that handler is able to run concurrently. But based on how we
were using kvm_lock so far...

Jan

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] KVM: Convert read-only users of vm_list to RCU
  2011-02-10 14:34                       ` Jan Kiszka
@ 2011-02-10 14:47                         ` Avi Kivity
  2011-02-10 14:55                           ` Jan Kiszka
  0 siblings, 1 reply; 16+ messages in thread
From: Avi Kivity @ 2011-02-10 14:47 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Marcelo Tosatti, kvm, Zachary Amsden

On 02/10/2011 04:34 PM, Jan Kiszka wrote:
> On 2011-02-10 15:26, Avi Kivity wrote:
> >  On 02/10/2011 03:47 PM, Jan Kiszka wrote:
> >>>>
> >>>>   Accept for mmu_shrink, which is write but not delete, thus works without
> >>>>   that slow synchronize_rcu.
> >>>
> >>>   I don't really see how you can implement list_move_rcu(), it has to be
> >>>   atomic or other users will see a partial vm_list.
> >>
> >>  Right, even if we synchronized that step cleanly, rcu-protected users
> >>  could miss the moving vm during concurrent list walks.
> >>
> >>  What about using a separate mutex for protecting vm_list instead?
> >>  Unless I missed some detail, mmu_shrink should allow blocking.
> >
> >  What else does kvm_lock protect?
>
> Someone tried to write a locking.txt and stated that it's also
> protecting enabling/disabling hardware virtualization. But that guy may
> have overlooked something.

Right.  I guess splitting that lock makes sense.

> >
> >  I think we could simply reduce the amount of time we hold kvm_lock.
> >  Pick a vm, ref it, list_move_tail(), unlock, then do the actual
> >  shrinking.  Of course taking a ref must be done carefully, we might
> >  already be in kvm_destroy_vm() at that time.
> >
>
> Plain mutex held across the whole mmu_shrink loop is still simpler and
> should be sufficient - unless we also have to deal with scalability
> issues if that handler is able to run concurrently. But based on how we
> were using kvm_lock so far...

I don't think a mutex would work for kvmclock_cpufreq_notifier().  At 
the very least, we'd need a preempt_disable() there.  At the worst, the 
notifier won't like sleeping.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] KVM: Convert read-only users of vm_list to RCU
  2011-02-10 14:47                         ` Avi Kivity
@ 2011-02-10 14:55                           ` Jan Kiszka
  0 siblings, 0 replies; 16+ messages in thread
From: Jan Kiszka @ 2011-02-10 14:55 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, kvm, Zachary Amsden

On 2011-02-10 15:47, Avi Kivity wrote:
> On 02/10/2011 04:34 PM, Jan Kiszka wrote:
>> On 2011-02-10 15:26, Avi Kivity wrote:
>>>  On 02/10/2011 03:47 PM, Jan Kiszka wrote:
>>>>>>
>>>>>>   Accept for mmu_shrink, which is write but not delete, thus works without
>>>>>>   that slow synchronize_rcu.
>>>>>
>>>>>   I don't really see how you can implement list_move_rcu(), it has to be
>>>>>   atomic or other users will see a partial vm_list.
>>>>
>>>>  Right, even if we synchronized that step cleanly, rcu-protected users
>>>>  could miss the moving vm during concurrent list walks.
>>>>
>>>>  What about using a separate mutex for protecting vm_list instead?
>>>>  Unless I missed some detail, mmu_shrink should allow blocking.
>>>
>>>  What else does kvm_lock protect?
>>
>> Someone tried to write a locking.txt and stated that it's also
>> protecting enabling/disabling hardware virtualization. But that guy may
>> have overlooked something.
> 
> Right.  I guess splitting that lock makes sense.
> 
>>>
>>>  I think we could simply reduce the amount of time we hold kvm_lock.
>>>  Pick a vm, ref it, list_move_tail(), unlock, then do the actual
>>>  shrinking.  Of course taking a ref must be done carefully, we might
>>>  already be in kvm_destroy_vm() at that time.
>>>
>>
>> Plain mutex held across the whole mmu_shrink loop is still simpler and
>> should be sufficient - unless we also have to deal with scalability
>> issues if that handler is able to run concurrently. But based on how we
>> were using kvm_lock so far...
> 
> I don't think a mutex would work for kvmclock_cpufreq_notifier().  At 
> the very least, we'd need a preempt_disable() there.  At the worst, the 
> notifier won't like sleeping.

Damn, there was that other user. Yes, this means we need to break the
lock in mmu_shrink.

Jan

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] KVM: Convert read-only users of vm_list to RCU
  2011-02-08 11:55 ` [PATCH 2/2] KVM: Convert read-only users of vm_list to RCU Jan Kiszka
  2011-02-10 10:16   ` Avi Kivity
@ 2011-02-15 12:32   ` Marcelo Tosatti
  2011-02-15 14:08     ` Jan Kiszka
  1 sibling, 1 reply; 16+ messages in thread
From: Marcelo Tosatti @ 2011-02-15 12:32 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Avi Kivity, kvm, Zachary Amsden

On Tue, Feb 08, 2011 at 12:55:39PM +0100, Jan Kiszka wrote:
> Only for walking the list of VMs, we do not need to hold the preemption
> disabling kvm_lock. Convert stat services, the cpufreq callback and
> mmu_shrink to RCU. For the latter, special care is required to
> synchronize its list_move_tail with kvm_destroy_vm.
> 
> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>

Jan,

What is the motivation for this change?


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] KVM: Convert read-only users of vm_list to RCU
  2011-02-15 12:32   ` Marcelo Tosatti
@ 2011-02-15 14:08     ` Jan Kiszka
  0 siblings, 0 replies; 16+ messages in thread
From: Jan Kiszka @ 2011-02-15 14:08 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: Avi Kivity, kvm, Zachary Amsden

On 2011-02-15 13:32, Marcelo Tosatti wrote:
> On Tue, Feb 08, 2011 at 12:55:39PM +0100, Jan Kiszka wrote:
>> Only for walking the list of VMs, we do not need to hold the preemption
>> disabling kvm_lock. Convert stat services, the cpufreq callback and
>> mmu_shrink to RCU. For the latter, special care is required to
>> synchronize its list_move_tail with kvm_destroy_vm.
>>
>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
> 
> Jan,
> 
> What is the motivation for this change?
> 

See http://thread.gmane.org/gmane.linux.kernel/1097043

However, this proposal turned out to be incorrect. The strategy is now
to remove the stats users and reduce the lock-holding time of mmu_shrink
differently.

Jan

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2011-02-15 14:08 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <4D512EF7.8040409@siemens.com>
2011-02-08 11:55 ` [PATCH 2/2] KVM: Convert read-only users of vm_list to RCU Jan Kiszka
2011-02-10 10:16   ` Avi Kivity
2011-02-10 11:31     ` Jan Kiszka
2011-02-10 12:34       ` Avi Kivity
2011-02-10 12:45         ` Jan Kiszka
2011-02-10 12:56           ` Avi Kivity
2011-02-10 12:57             ` Avi Kivity
2011-02-10 13:14               ` Jan Kiszka
2011-02-10 13:19                 ` Avi Kivity
2011-02-10 13:47                   ` Jan Kiszka
2011-02-10 14:26                     ` Avi Kivity
2011-02-10 14:34                       ` Jan Kiszka
2011-02-10 14:47                         ` Avi Kivity
2011-02-10 14:55                           ` Jan Kiszka
2011-02-15 12:32   ` Marcelo Tosatti
2011-02-15 14:08     ` Jan Kiszka

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.