* [PATCH 2/2] KVM: Convert read-only users of vm_list to RCU
[not found] <4D512EF7.8040409@siemens.com>
@ 2011-02-08 11:55 ` Jan Kiszka
2011-02-10 10:16 ` Avi Kivity
2011-02-15 12:32 ` Marcelo Tosatti
0 siblings, 2 replies; 16+ messages in thread
From: Jan Kiszka @ 2011-02-08 11:55 UTC (permalink / raw)
To: Avi Kivity, Marcelo Tosatti; +Cc: kvm, Zachary Amsden
Only for walking the list of VMs, we do not need to hold the preemption
disabling kvm_lock. Convert stat services, the cpufreq callback and
mmu_shrink to RCU. For the latter, special care is required to
synchronize its list_move_tail with kvm_destroy_vm.
Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---
arch/x86/kvm/mmu.c | 14 +++++++++-----
arch/x86/kvm/x86.c | 4 ++--
include/linux/kvm_host.h | 1 +
virt/kvm/kvm_main.c | 17 ++++++++++-------
4 files changed, 22 insertions(+), 14 deletions(-)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index b6a9963..e9d0ed8 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3587,9 +3587,9 @@ static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
if (nr_to_scan == 0)
goto out;
- raw_spin_lock(&kvm_lock);
+ rcu_read_lock();
- list_for_each_entry(kvm, &vm_list, vm_list) {
+ list_for_each_entry_rcu(kvm, &vm_list, vm_list) {
int idx, freed_pages;
LIST_HEAD(invalid_list);
@@ -3607,10 +3607,14 @@ static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
spin_unlock(&kvm->mmu_lock);
srcu_read_unlock(&kvm->srcu, idx);
}
- if (kvm_freed)
- list_move_tail(&kvm_freed->vm_list, &vm_list);
+ if (kvm_freed) {
+ raw_spin_lock(&kvm_lock);
+ if (!kvm->deleted)
+ list_move_tail(&kvm_freed->vm_list, &vm_list);
+ raw_spin_unlock(&kvm_lock);
+ }
- raw_spin_unlock(&kvm_lock);
+ rcu_read_unlock();
out:
return percpu_counter_read_positive(&kvm_total_used_mmu_pages);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ae6e75b..01fa1ea 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4578,7 +4578,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
- raw_spin_lock(&kvm_lock);
+ rcu_read_lock();
list_for_each_entry(kvm, &vm_list, vm_list) {
kvm_for_each_vcpu(i, vcpu, kvm) {
if (vcpu->cpu != freq->cpu)
@@ -4588,7 +4588,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
send_ipi = 1;
}
}
- raw_spin_unlock(&kvm_lock);
+ rcu_read_unlock();
if (freq->old < freq->new && send_ipi) {
/*
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index c8dee22..9074cac 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -234,6 +234,7 @@ struct kvm {
#endif
struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
atomic_t online_vcpus;
+ bool deleted;
struct list_head vm_list;
struct mutex lock;
struct kvm_io_bus *buses[KVM_NR_BUSES];
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 7b70c67..b5a05b5 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -546,9 +546,13 @@ static void kvm_destroy_vm(struct kvm *kvm)
struct mm_struct *mm = kvm->mm;
kvm_arch_sync_events(kvm);
+
raw_spin_lock(&kvm_lock);
list_del(&kvm->vm_list);
+ kvm->deleted = true;
raw_spin_unlock(&kvm_lock);
+ synchronize_rcu();
+
kvm_free_irq_routing(kvm);
for (i = 0; i < KVM_NR_BUSES; i++)
kvm_io_bus_destroy(kvm->buses[i]);
@@ -2347,10 +2351,10 @@ static int vm_stat_get(void *_offset, u64 *val)
struct kvm *kvm;
*val = 0;
- raw_spin_lock(&kvm_lock);
- list_for_each_entry(kvm, &vm_list, vm_list)
+ rcu_read_lock();
+ list_for_each_entry_rcu(kvm, &vm_list, vm_list)
*val += *(u32 *)((void *)kvm + offset);
- raw_spin_unlock(&kvm_lock);
+ rcu_read_unlock();
return 0;
}
@@ -2364,12 +2368,11 @@ static int vcpu_stat_get(void *_offset, u64 *val)
int i;
*val = 0;
- raw_spin_lock(&kvm_lock);
- list_for_each_entry(kvm, &vm_list, vm_list)
+ rcu_read_lock();
+ list_for_each_entry_rcu(kvm, &vm_list, vm_list)
kvm_for_each_vcpu(i, vcpu, kvm)
*val += *(u32 *)((void *)vcpu + offset);
-
- raw_spin_unlock(&kvm_lock);
+ rcu_read_unlock();
return 0;
}
--
1.7.1
^ permalink raw reply related [flat|nested] 16+ messages in thread
* Re: [PATCH 2/2] KVM: Convert read-only users of vm_list to RCU
2011-02-08 11:55 ` [PATCH 2/2] KVM: Convert read-only users of vm_list to RCU Jan Kiszka
@ 2011-02-10 10:16 ` Avi Kivity
2011-02-10 11:31 ` Jan Kiszka
2011-02-15 12:32 ` Marcelo Tosatti
1 sibling, 1 reply; 16+ messages in thread
From: Avi Kivity @ 2011-02-10 10:16 UTC (permalink / raw)
To: Jan Kiszka; +Cc: Marcelo Tosatti, kvm, Zachary Amsden
On 02/08/2011 01:55 PM, Jan Kiszka wrote:
> Only for walking the list of VMs, we do not need to hold the preemption
> disabling kvm_lock. Convert stat services, the cpufreq callback and
> mmu_shrink to RCU. For the latter, special care is required to
> synchronize its list_move_tail with kvm_destroy_vm.
>
>
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index b6a9963..e9d0ed8 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -3587,9 +3587,9 @@ static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
> if (nr_to_scan == 0)
> goto out;
>
> - raw_spin_lock(&kvm_lock);
> + rcu_read_lock();
>
> - list_for_each_entry(kvm,&vm_list, vm_list) {
> + list_for_each_entry_rcu(kvm,&vm_list, vm_list) {
> int idx, freed_pages;
> LIST_HEAD(invalid_list);
Have to #include rculist.h, and to change all list operations on vm_list
to rcu variants.
>
> @@ -3607,10 +3607,14 @@ static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
> spin_unlock(&kvm->mmu_lock);
> srcu_read_unlock(&kvm->srcu, idx);
> }
> - if (kvm_freed)
> - list_move_tail(&kvm_freed->vm_list,&vm_list);
> + if (kvm_freed) {
> + raw_spin_lock(&kvm_lock);
> + if (!kvm->deleted)
> + list_move_tail(&kvm_freed->vm_list,&vm_list);
There is no list_move_tail_rcu().
Why check kvm->deleted? it's in the process of being torn down anyway,
it doesn't matter if mmu_shrink or kvm_destroy_vm pulls the trigger.
> + raw_spin_unlock(&kvm_lock);
> + }
>
> - raw_spin_unlock(&kvm_lock);
> + rcu_read_unlock();
>
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/2] KVM: Convert read-only users of vm_list to RCU
2011-02-10 10:16 ` Avi Kivity
@ 2011-02-10 11:31 ` Jan Kiszka
2011-02-10 12:34 ` Avi Kivity
0 siblings, 1 reply; 16+ messages in thread
From: Jan Kiszka @ 2011-02-10 11:31 UTC (permalink / raw)
To: Avi Kivity; +Cc: Marcelo Tosatti, kvm, Zachary Amsden
On 2011-02-10 11:16, Avi Kivity wrote:
> On 02/08/2011 01:55 PM, Jan Kiszka wrote:
>> Only for walking the list of VMs, we do not need to hold the preemption
>> disabling kvm_lock. Convert stat services, the cpufreq callback and
>> mmu_shrink to RCU. For the latter, special care is required to
>> synchronize its list_move_tail with kvm_destroy_vm.
>>
>>
>> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
>> index b6a9963..e9d0ed8 100644
>> --- a/arch/x86/kvm/mmu.c
>> +++ b/arch/x86/kvm/mmu.c
>> @@ -3587,9 +3587,9 @@ static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
>> if (nr_to_scan == 0)
>> goto out;
>>
>> - raw_spin_lock(&kvm_lock);
>> + rcu_read_lock();
>>
>> - list_for_each_entry(kvm,&vm_list, vm_list) {
>> + list_for_each_entry_rcu(kvm,&vm_list, vm_list) {
>> int idx, freed_pages;
>> LIST_HEAD(invalid_list);
>
> Have to #include rculist.h,
OK.
> and to change all list operations on vm_list
> to rcu variants.
Not sure if we have such variants for all cases...
>
>>
>> @@ -3607,10 +3607,14 @@ static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
>> spin_unlock(&kvm->mmu_lock);
>> srcu_read_unlock(&kvm->srcu, idx);
>> }
>> - if (kvm_freed)
>> - list_move_tail(&kvm_freed->vm_list,&vm_list);
>> + if (kvm_freed) {
>> + raw_spin_lock(&kvm_lock);
>> + if (!kvm->deleted)
>> + list_move_tail(&kvm_freed->vm_list,&vm_list);
>
> There is no list_move_tail_rcu().
...specifically not for this one.
>
> Why check kvm->deleted? it's in the process of being torn down anyway,
> it doesn't matter if mmu_shrink or kvm_destroy_vm pulls the trigger.
kvm_destroy_vm removes a vm from the list while mmu_shrink is running.
Then mmu_shrink's list_move_tail will re-add that vm to the list tail
again (unless already the removal in move_tail produces a crash).
Jan
--
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/2] KVM: Convert read-only users of vm_list to RCU
2011-02-10 11:31 ` Jan Kiszka
@ 2011-02-10 12:34 ` Avi Kivity
2011-02-10 12:45 ` Jan Kiszka
0 siblings, 1 reply; 16+ messages in thread
From: Avi Kivity @ 2011-02-10 12:34 UTC (permalink / raw)
To: Jan Kiszka; +Cc: Marcelo Tosatti, kvm, Zachary Amsden
On 02/10/2011 01:31 PM, Jan Kiszka wrote:
> >
> >>
> >> @@ -3607,10 +3607,14 @@ static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
> >> spin_unlock(&kvm->mmu_lock);
> >> srcu_read_unlock(&kvm->srcu, idx);
> >> }
> >> - if (kvm_freed)
> >> - list_move_tail(&kvm_freed->vm_list,&vm_list);
> >> + if (kvm_freed) {
> >> + raw_spin_lock(&kvm_lock);
> >> + if (!kvm->deleted)
> >> + list_move_tail(&kvm_freed->vm_list,&vm_list);
> >
> > There is no list_move_tail_rcu().
>
> ...specifically not for this one.
Well, we can add one if needed (and if possible).
> >
> > Why check kvm->deleted? it's in the process of being torn down anyway,
> > it doesn't matter if mmu_shrink or kvm_destroy_vm pulls the trigger.
>
> kvm_destroy_vm removes a vm from the list while mmu_shrink is running.
> Then mmu_shrink's list_move_tail will re-add that vm to the list tail
> again (unless already the removal in move_tail produces a crash).
It's too subtle. Communication across threads with a variable needs
memory barriers (even though they're nops on x86) and documentation.
btw, not even sure if it's legal: you have a mutating call within an rcu
read critical section for the same object. If synchronize_rcu() were
called there, would it ever terminate?
(not that synchronize_rcu() is a good thing there, better do it with
call_rcu()).
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/2] KVM: Convert read-only users of vm_list to RCU
2011-02-10 12:34 ` Avi Kivity
@ 2011-02-10 12:45 ` Jan Kiszka
2011-02-10 12:56 ` Avi Kivity
0 siblings, 1 reply; 16+ messages in thread
From: Jan Kiszka @ 2011-02-10 12:45 UTC (permalink / raw)
To: Avi Kivity; +Cc: Marcelo Tosatti, kvm, Zachary Amsden
On 2011-02-10 13:34, Avi Kivity wrote:
> On 02/10/2011 01:31 PM, Jan Kiszka wrote:
>>>
>>>>
>>>> @@ -3607,10 +3607,14 @@ static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
>>>> spin_unlock(&kvm->mmu_lock);
>>>> srcu_read_unlock(&kvm->srcu, idx);
>>>> }
>>>> - if (kvm_freed)
>>>> - list_move_tail(&kvm_freed->vm_list,&vm_list);
>>>> + if (kvm_freed) {
>>>> + raw_spin_lock(&kvm_lock);
>>>> + if (!kvm->deleted)
>>>> + list_move_tail(&kvm_freed->vm_list,&vm_list);
>>>
>>> There is no list_move_tail_rcu().
>>
>> ...specifically not for this one.
>
> Well, we can add one if needed (and if possible).
I can have a look, at least at the lower hanging fruits.
>
>>>
>>> Why check kvm->deleted? it's in the process of being torn down anyway,
>>> it doesn't matter if mmu_shrink or kvm_destroy_vm pulls the trigger.
>>
>> kvm_destroy_vm removes a vm from the list while mmu_shrink is running.
>> Then mmu_shrink's list_move_tail will re-add that vm to the list tail
>> again (unless already the removal in move_tail produces a crash).
>
> It's too subtle. Communication across threads with a variable needs
> memory barriers (even though they're nops on x86) and documentation.
The barriers are provided by this spin lock we acquire for testing are
modifying deleted.
>
> btw, not even sure if it's legal: you have a mutating call within an rcu
> read critical section for the same object. If synchronize_rcu() were
> called there, would it ever terminate?
Why not? kvm_destroy_vm is not preventing blocking mmu_shrink to acquire
the kvm_lock where we then find the vm deleted and release both kvm_lock
and the rcu read "lock" afterwards.
>
> (not that synchronize_rcu() is a good thing there, better do it with
> call_rcu()).
What's the benefit? The downside is a bit more complexity as you need an
additional callback handler.
Jan
--
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/2] KVM: Convert read-only users of vm_list to RCU
2011-02-10 12:45 ` Jan Kiszka
@ 2011-02-10 12:56 ` Avi Kivity
2011-02-10 12:57 ` Avi Kivity
0 siblings, 1 reply; 16+ messages in thread
From: Avi Kivity @ 2011-02-10 12:56 UTC (permalink / raw)
To: Jan Kiszka; +Cc: Marcelo Tosatti, kvm, Zachary Amsden
On 02/10/2011 02:45 PM, Jan Kiszka wrote:
> >>>
> >>> There is no list_move_tail_rcu().
> >>
> >> ...specifically not for this one.
> >
> > Well, we can add one if needed (and if possible).
>
> I can have a look, at least at the lower hanging fruits.
Please keep rcu->parent in the loop.
> >
> >>>
> >>> Why check kvm->deleted? it's in the process of being torn down anyway,
> >>> it doesn't matter if mmu_shrink or kvm_destroy_vm pulls the trigger.
> >>
> >> kvm_destroy_vm removes a vm from the list while mmu_shrink is running.
> >> Then mmu_shrink's list_move_tail will re-add that vm to the list tail
> >> again (unless already the removal in move_tail produces a crash).
> >
> > It's too subtle. Communication across threads with a variable needs
> > memory barriers (even though they're nops on x86) and documentation.
>
> The barriers are provided by this spin lock we acquire for testing are
> modifying deleted.
Right.
I'm not thrilled with adding ->deleted though.
> >
> > btw, not even sure if it's legal: you have a mutating call within an rcu
> > read critical section for the same object. If synchronize_rcu() were
> > called there, would it ever terminate?
>
> Why not? kvm_destroy_vm is not preventing blocking mmu_shrink to acquire
> the kvm_lock where we then find the vm deleted and release both kvm_lock
> and the rcu read "lock" afterwards.
synchronize_rcu() waits until all currently running rcu read-side
critical sections are completed. But we are in the middle of one, which
isn't going to complete until it synchronize_rcu() returns.
> >
> > (not that synchronize_rcu() is a good thing there, better do it with
> > call_rcu()).
>
> What's the benefit? The downside is a bit more complexity as you need an
> additional callback handler.
synchronize_rcu() can be very slow (its a systemwide operation), and
mmu_shrink() can be called often on a loaded system.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/2] KVM: Convert read-only users of vm_list to RCU
2011-02-10 12:56 ` Avi Kivity
@ 2011-02-10 12:57 ` Avi Kivity
2011-02-10 13:14 ` Jan Kiszka
0 siblings, 1 reply; 16+ messages in thread
From: Avi Kivity @ 2011-02-10 12:57 UTC (permalink / raw)
To: Jan Kiszka; +Cc: Marcelo Tosatti, kvm, Zachary Amsden
On 02/10/2011 02:56 PM, Avi Kivity wrote:
>> What's the benefit? The downside is a bit more complexity as you need an
>> additional callback handler.
>
>
> synchronize_rcu() can be very slow (its a systemwide operation), and
> mmu_shrink() can be called often on a loaded system.
>
In fact this just shows that vm_list is not a good candidate for rcu;
rcu is useful where most operations are reads, but if we discount stats,
most operations on vm_list are going to be writes.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/2] KVM: Convert read-only users of vm_list to RCU
2011-02-10 12:57 ` Avi Kivity
@ 2011-02-10 13:14 ` Jan Kiszka
2011-02-10 13:19 ` Avi Kivity
0 siblings, 1 reply; 16+ messages in thread
From: Jan Kiszka @ 2011-02-10 13:14 UTC (permalink / raw)
To: Avi Kivity; +Cc: Marcelo Tosatti, kvm, Zachary Amsden
On 2011-02-10 13:57, Avi Kivity wrote:
> On 02/10/2011 02:56 PM, Avi Kivity wrote:
>>> What's the benefit? The downside is a bit more complexity as you need an
>>> additional callback handler.
>>
>>
>> synchronize_rcu() can be very slow (its a systemwide operation), and
>> mmu_shrink() can be called often on a loaded system.
>>
>
> In fact this just shows that vm_list is not a good candidate for rcu;
> rcu is useful where most operations are reads, but if we discount stats,
> most operations on vm_list are going to be writes.
Accept for mmu_shrink, which is write but not delete, thus works without
that slow synchronize_rcu. And I don't see the need for call_rcu in the
vm deletion path.
Jan
--
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/2] KVM: Convert read-only users of vm_list to RCU
2011-02-10 13:14 ` Jan Kiszka
@ 2011-02-10 13:19 ` Avi Kivity
2011-02-10 13:47 ` Jan Kiszka
0 siblings, 1 reply; 16+ messages in thread
From: Avi Kivity @ 2011-02-10 13:19 UTC (permalink / raw)
To: Jan Kiszka; +Cc: Marcelo Tosatti, kvm, Zachary Amsden
On 02/10/2011 03:14 PM, Jan Kiszka wrote:
> On 2011-02-10 13:57, Avi Kivity wrote:
> > On 02/10/2011 02:56 PM, Avi Kivity wrote:
> >>> What's the benefit? The downside is a bit more complexity as you need an
> >>> additional callback handler.
> >>
> >>
> >> synchronize_rcu() can be very slow (its a systemwide operation), and
> >> mmu_shrink() can be called often on a loaded system.
> >>
> >
> > In fact this just shows that vm_list is not a good candidate for rcu;
> > rcu is useful where most operations are reads, but if we discount stats,
> > most operations on vm_list are going to be writes.
>
> Accept for mmu_shrink, which is write but not delete, thus works without
> that slow synchronize_rcu.
I don't really see how you can implement list_move_rcu(), it has to be
atomic or other users will see a partial vm_list.
> And I don't see the need for call_rcu in the
> vm deletion path.
synchronize_rcu() is fine for vm destruction.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/2] KVM: Convert read-only users of vm_list to RCU
2011-02-10 13:19 ` Avi Kivity
@ 2011-02-10 13:47 ` Jan Kiszka
2011-02-10 14:26 ` Avi Kivity
0 siblings, 1 reply; 16+ messages in thread
From: Jan Kiszka @ 2011-02-10 13:47 UTC (permalink / raw)
To: Avi Kivity; +Cc: Marcelo Tosatti, kvm, Zachary Amsden
On 2011-02-10 14:19, Avi Kivity wrote:
> On 02/10/2011 03:14 PM, Jan Kiszka wrote:
>> On 2011-02-10 13:57, Avi Kivity wrote:
>>> On 02/10/2011 02:56 PM, Avi Kivity wrote:
>>>>> What's the benefit? The downside is a bit more complexity as you need an
>>>>> additional callback handler.
>>>>
>>>>
>>>> synchronize_rcu() can be very slow (its a systemwide operation), and
>>>> mmu_shrink() can be called often on a loaded system.
>>>>
>>>
>>> In fact this just shows that vm_list is not a good candidate for rcu;
>>> rcu is useful where most operations are reads, but if we discount stats,
>>> most operations on vm_list are going to be writes.
>>
>> Accept for mmu_shrink, which is write but not delete, thus works without
>> that slow synchronize_rcu.
>
> I don't really see how you can implement list_move_rcu(), it has to be
> atomic or other users will see a partial vm_list.
Right, even if we synchronized that step cleanly, rcu-protected users
could miss the moving vm during concurrent list walks.
What about using a separate mutex for protecting vm_list instead?
Unless I missed some detail, mmu_shrink should allow blocking.
Jan
--
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/2] KVM: Convert read-only users of vm_list to RCU
2011-02-10 13:47 ` Jan Kiszka
@ 2011-02-10 14:26 ` Avi Kivity
2011-02-10 14:34 ` Jan Kiszka
0 siblings, 1 reply; 16+ messages in thread
From: Avi Kivity @ 2011-02-10 14:26 UTC (permalink / raw)
To: Jan Kiszka; +Cc: Marcelo Tosatti, kvm, Zachary Amsden
On 02/10/2011 03:47 PM, Jan Kiszka wrote:
> >>
> >> Accept for mmu_shrink, which is write but not delete, thus works without
> >> that slow synchronize_rcu.
> >
> > I don't really see how you can implement list_move_rcu(), it has to be
> > atomic or other users will see a partial vm_list.
>
> Right, even if we synchronized that step cleanly, rcu-protected users
> could miss the moving vm during concurrent list walks.
>
> What about using a separate mutex for protecting vm_list instead?
> Unless I missed some detail, mmu_shrink should allow blocking.
What else does kvm_lock protect?
I think we could simply reduce the amount of time we hold kvm_lock.
Pick a vm, ref it, list_move_tail(), unlock, then do the actual
shrinking. Of course taking a ref must be done carefully, we might
already be in kvm_destroy_vm() at that time.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/2] KVM: Convert read-only users of vm_list to RCU
2011-02-10 14:26 ` Avi Kivity
@ 2011-02-10 14:34 ` Jan Kiszka
2011-02-10 14:47 ` Avi Kivity
0 siblings, 1 reply; 16+ messages in thread
From: Jan Kiszka @ 2011-02-10 14:34 UTC (permalink / raw)
To: Avi Kivity; +Cc: Marcelo Tosatti, kvm, Zachary Amsden
On 2011-02-10 15:26, Avi Kivity wrote:
> On 02/10/2011 03:47 PM, Jan Kiszka wrote:
>>>>
>>>> Accept for mmu_shrink, which is write but not delete, thus works without
>>>> that slow synchronize_rcu.
>>>
>>> I don't really see how you can implement list_move_rcu(), it has to be
>>> atomic or other users will see a partial vm_list.
>>
>> Right, even if we synchronized that step cleanly, rcu-protected users
>> could miss the moving vm during concurrent list walks.
>>
>> What about using a separate mutex for protecting vm_list instead?
>> Unless I missed some detail, mmu_shrink should allow blocking.
>
> What else does kvm_lock protect?
Someone tried to write a locking.txt and stated that it's also
protecting enabling/disabling hardware virtualization. But that guy may
have overlooked something.
>
> I think we could simply reduce the amount of time we hold kvm_lock.
> Pick a vm, ref it, list_move_tail(), unlock, then do the actual
> shrinking. Of course taking a ref must be done carefully, we might
> already be in kvm_destroy_vm() at that time.
>
Plain mutex held across the whole mmu_shrink loop is still simpler and
should be sufficient - unless we also have to deal with scalability
issues if that handler is able to run concurrently. But based on how we
were using kvm_lock so far...
Jan
--
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/2] KVM: Convert read-only users of vm_list to RCU
2011-02-10 14:34 ` Jan Kiszka
@ 2011-02-10 14:47 ` Avi Kivity
2011-02-10 14:55 ` Jan Kiszka
0 siblings, 1 reply; 16+ messages in thread
From: Avi Kivity @ 2011-02-10 14:47 UTC (permalink / raw)
To: Jan Kiszka; +Cc: Marcelo Tosatti, kvm, Zachary Amsden
On 02/10/2011 04:34 PM, Jan Kiszka wrote:
> On 2011-02-10 15:26, Avi Kivity wrote:
> > On 02/10/2011 03:47 PM, Jan Kiszka wrote:
> >>>>
> >>>> Accept for mmu_shrink, which is write but not delete, thus works without
> >>>> that slow synchronize_rcu.
> >>>
> >>> I don't really see how you can implement list_move_rcu(), it has to be
> >>> atomic or other users will see a partial vm_list.
> >>
> >> Right, even if we synchronized that step cleanly, rcu-protected users
> >> could miss the moving vm during concurrent list walks.
> >>
> >> What about using a separate mutex for protecting vm_list instead?
> >> Unless I missed some detail, mmu_shrink should allow blocking.
> >
> > What else does kvm_lock protect?
>
> Someone tried to write a locking.txt and stated that it's also
> protecting enabling/disabling hardware virtualization. But that guy may
> have overlooked something.
Right. I guess splitting that lock makes sense.
> >
> > I think we could simply reduce the amount of time we hold kvm_lock.
> > Pick a vm, ref it, list_move_tail(), unlock, then do the actual
> > shrinking. Of course taking a ref must be done carefully, we might
> > already be in kvm_destroy_vm() at that time.
> >
>
> Plain mutex held across the whole mmu_shrink loop is still simpler and
> should be sufficient - unless we also have to deal with scalability
> issues if that handler is able to run concurrently. But based on how we
> were using kvm_lock so far...
I don't think a mutex would work for kvmclock_cpufreq_notifier(). At
the very least, we'd need a preempt_disable() there. At the worst, the
notifier won't like sleeping.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/2] KVM: Convert read-only users of vm_list to RCU
2011-02-10 14:47 ` Avi Kivity
@ 2011-02-10 14:55 ` Jan Kiszka
0 siblings, 0 replies; 16+ messages in thread
From: Jan Kiszka @ 2011-02-10 14:55 UTC (permalink / raw)
To: Avi Kivity; +Cc: Marcelo Tosatti, kvm, Zachary Amsden
On 2011-02-10 15:47, Avi Kivity wrote:
> On 02/10/2011 04:34 PM, Jan Kiszka wrote:
>> On 2011-02-10 15:26, Avi Kivity wrote:
>>> On 02/10/2011 03:47 PM, Jan Kiszka wrote:
>>>>>>
>>>>>> Accept for mmu_shrink, which is write but not delete, thus works without
>>>>>> that slow synchronize_rcu.
>>>>>
>>>>> I don't really see how you can implement list_move_rcu(), it has to be
>>>>> atomic or other users will see a partial vm_list.
>>>>
>>>> Right, even if we synchronized that step cleanly, rcu-protected users
>>>> could miss the moving vm during concurrent list walks.
>>>>
>>>> What about using a separate mutex for protecting vm_list instead?
>>>> Unless I missed some detail, mmu_shrink should allow blocking.
>>>
>>> What else does kvm_lock protect?
>>
>> Someone tried to write a locking.txt and stated that it's also
>> protecting enabling/disabling hardware virtualization. But that guy may
>> have overlooked something.
>
> Right. I guess splitting that lock makes sense.
>
>>>
>>> I think we could simply reduce the amount of time we hold kvm_lock.
>>> Pick a vm, ref it, list_move_tail(), unlock, then do the actual
>>> shrinking. Of course taking a ref must be done carefully, we might
>>> already be in kvm_destroy_vm() at that time.
>>>
>>
>> Plain mutex held across the whole mmu_shrink loop is still simpler and
>> should be sufficient - unless we also have to deal with scalability
>> issues if that handler is able to run concurrently. But based on how we
>> were using kvm_lock so far...
>
> I don't think a mutex would work for kvmclock_cpufreq_notifier(). At
> the very least, we'd need a preempt_disable() there. At the worst, the
> notifier won't like sleeping.
Damn, there was that other user. Yes, this means we need to break the
lock in mmu_shrink.
Jan
--
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/2] KVM: Convert read-only users of vm_list to RCU
2011-02-08 11:55 ` [PATCH 2/2] KVM: Convert read-only users of vm_list to RCU Jan Kiszka
2011-02-10 10:16 ` Avi Kivity
@ 2011-02-15 12:32 ` Marcelo Tosatti
2011-02-15 14:08 ` Jan Kiszka
1 sibling, 1 reply; 16+ messages in thread
From: Marcelo Tosatti @ 2011-02-15 12:32 UTC (permalink / raw)
To: Jan Kiszka; +Cc: Avi Kivity, kvm, Zachary Amsden
On Tue, Feb 08, 2011 at 12:55:39PM +0100, Jan Kiszka wrote:
> Only for walking the list of VMs, we do not need to hold the preemption
> disabling kvm_lock. Convert stat services, the cpufreq callback and
> mmu_shrink to RCU. For the latter, special care is required to
> synchronize its list_move_tail with kvm_destroy_vm.
>
> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Jan,
What is the motivation for this change?
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/2] KVM: Convert read-only users of vm_list to RCU
2011-02-15 12:32 ` Marcelo Tosatti
@ 2011-02-15 14:08 ` Jan Kiszka
0 siblings, 0 replies; 16+ messages in thread
From: Jan Kiszka @ 2011-02-15 14:08 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: Avi Kivity, kvm, Zachary Amsden
On 2011-02-15 13:32, Marcelo Tosatti wrote:
> On Tue, Feb 08, 2011 at 12:55:39PM +0100, Jan Kiszka wrote:
>> Only for walking the list of VMs, we do not need to hold the preemption
>> disabling kvm_lock. Convert stat services, the cpufreq callback and
>> mmu_shrink to RCU. For the latter, special care is required to
>> synchronize its list_move_tail with kvm_destroy_vm.
>>
>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
>
> Jan,
>
> What is the motivation for this change?
>
See http://thread.gmane.org/gmane.linux.kernel/1097043
However, this proposal turned out to be incorrect. The strategy is now
to remove the stats users and reduce the lock-holding time of mmu_shrink
differently.
Jan
--
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux
^ permalink raw reply [flat|nested] 16+ messages in thread
end of thread, other threads:[~2011-02-15 14:08 UTC | newest]
Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
[not found] <4D512EF7.8040409@siemens.com>
2011-02-08 11:55 ` [PATCH 2/2] KVM: Convert read-only users of vm_list to RCU Jan Kiszka
2011-02-10 10:16 ` Avi Kivity
2011-02-10 11:31 ` Jan Kiszka
2011-02-10 12:34 ` Avi Kivity
2011-02-10 12:45 ` Jan Kiszka
2011-02-10 12:56 ` Avi Kivity
2011-02-10 12:57 ` Avi Kivity
2011-02-10 13:14 ` Jan Kiszka
2011-02-10 13:19 ` Avi Kivity
2011-02-10 13:47 ` Jan Kiszka
2011-02-10 14:26 ` Avi Kivity
2011-02-10 14:34 ` Jan Kiszka
2011-02-10 14:47 ` Avi Kivity
2011-02-10 14:55 ` Jan Kiszka
2011-02-15 12:32 ` Marcelo Tosatti
2011-02-15 14:08 ` Jan Kiszka
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.