kvm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/2][RFC] Scheduler preemption hooks, #2
@ 2007-07-11 15:55 Avi Kivity
       [not found] ` <11841693332609-git-send-email-avi-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
  0 siblings, 1 reply; 8+ messages in thread
From: Avi Kivity @ 2007-07-11 15:55 UTC (permalink / raw)
  To: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f
  Cc: linux-kernel-u79uwXL29TY76Z2rM5mHXA

I redid the the scheduler preemption patches based on Rusty's feedback.
Things do look much cleaner this time around.

Changes:
- split into core and kvm parts
- incorporated Shaohua Li's fix
- each task now has a list of independently managed preemption hooks
- move preempt hook pointer to a hot cache line within task_struct

I haven't made kernel_fpu_begin()/kernel_fpu_end() preemptible as promised,
because (a) we need to find a place to put the fpu state, and (b) we should
get better performance by reusing cr0.ts anyway.


-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 1/2] SCHED: Generic hooks for trapping task preemption
       [not found] ` <11841693332609-git-send-email-avi-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
@ 2007-07-11 15:55   ` Avi Kivity
       [not found]     ` <11841693333159-git-send-email-avi-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
  2007-07-11 15:55   ` [PATCH 2/2] KVM: Use the scheduler preemption hooks to make kvm preemptible Avi Kivity
  1 sibling, 1 reply; 8+ messages in thread
From: Avi Kivity @ 2007-07-11 15:55 UTC (permalink / raw)
  To: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f
  Cc: linux-kernel-u79uwXL29TY76Z2rM5mHXA

This adds a general mechanism whereby a task can request the scheduler to
notify it whenever it is preempted or scheduled back in.  This allows the
task to swap any special-purpose registers like the fpu or Intel's VT
registers.

Signed-off-by: Avi Kivity <avi-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
---
 include/linux/preempt.h |   27 +++++++++++++++++++++++
 include/linux/sched.h   |    4 +++
 kernel/Kconfig.preempt  |    4 +++
 kernel/sched.c          |   54 +++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 89 insertions(+), 0 deletions(-)

diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index d0926d6..376fb57 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -8,6 +8,7 @@
 
 #include <linux/thread_info.h>
 #include <linux/linkage.h>
+#include <linux/list.h>
 
 #ifdef CONFIG_DEBUG_PREEMPT
   extern void fastcall add_preempt_count(int val);
@@ -60,4 +61,30 @@ do { \
 
 #endif
 
+#ifdef CONFIG_PREEMPT_HOOKS
+
+struct preempt_hook;
+
+struct preempt_ops {
+	void (*sched_in)(struct preempt_hook *hook, int cpu);
+	void (*sched_out)(struct preempt_hook *hook);
+};
+
+struct preempt_hook {
+	struct hlist_node link;
+	struct preempt_ops *ops;
+};
+
+void preempt_hook_register(struct preempt_hook *hook);
+void preempt_hook_unregister(struct preempt_hook *hook);
+
+static inline void preempt_hook_init(struct preempt_hook *hook,
+				     struct preempt_ops *ops)
+{
+	INIT_HLIST_NODE(&hook->link);
+	hook->ops = ops;
+}
+
+#endif
+
 #endif /* __LINUX_PREEMPT_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index cfb6805..a5f3d35 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -908,6 +908,10 @@ struct task_struct {
 	struct sched_class *sched_class;
 	struct sched_entity se;
 
+#ifdef CONFIG_PREEMPT_HOOKS
+	struct hlist_head preempt_hooks;  /* list of struct preempt_hook */
+#endif
+
 	unsigned short ioprio;
 #ifdef CONFIG_BLK_DEV_IO_TRACE
 	unsigned int btrace_seq;
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
index c64ce9c..d45df49 100644
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -63,3 +63,7 @@ config PREEMPT_BKL
 	  Say Y here if you are building a kernel for a desktop system.
 	  Say N if you are unsure.
 
+config PREEMPT_HOOKS
+	bool
+	depends on X86
+	default y
diff --git a/kernel/sched.c b/kernel/sched.c
index 9fbced6..080ab35 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1566,6 +1566,10 @@ static void __sched_fork(struct task_struct *p)
 	INIT_LIST_HEAD(&p->run_list);
 	p->se.on_rq = 0;
 
+#ifdef CONFIG_PREEMPT_HOOKS
+	INIT_HLIST_HEAD(&p->preempt_hooks);
+#endif
+
 	/*
 	 * We mark the process as running here, but have not actually
 	 * inserted it onto the runqueue yet. This guarantees that
@@ -1647,6 +1651,50 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
 	task_rq_unlock(rq, &flags);
 }
 
+#ifdef CONFIG_PREEMPT_HOOKS
+
+void preempt_hook_register(struct preempt_hook *hook)
+{
+	hlist_add_head(&hook->link, &current->preempt_hooks);
+}
+EXPORT_SYMBOL_GPL(preempt_hook_register);
+
+void preempt_hook_unregister(struct preempt_hook *hook)
+{
+	hlist_del(&hook->link);
+}
+EXPORT_SYMBOL_GPL(preempt_hook_unregister);
+
+static void fire_sched_in_preempt_hooks(struct task_struct *tsk)
+{
+	struct preempt_hook *hook;
+	struct hlist_node *node;
+
+	hlist_for_each_entry(hook, node, &tsk->preempt_hooks, link)
+		hook->ops->sched_in(hook, raw_smp_processor_id());
+}
+
+static void fire_sched_out_preempt_hooks(struct task_struct *tsk)
+{
+	struct preempt_hook *hook;
+	struct hlist_node *node;
+
+	hlist_for_each_entry(hook, node, &tsk->preempt_hooks, link)
+		hook->ops->sched_out(hook);
+}
+
+#else
+
+static void fire_sched_in_preempt_hooks(struct task_struct *tsk)
+{
+}
+
+static void fire_sched_out_preempt_hooks(struct task_struct *tsk)
+{
+}
+
+#endif
+
 /**
  * prepare_task_switch - prepare to switch tasks
  * @rq: the runqueue preparing to switch
@@ -1661,6 +1709,7 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
  */
 static inline void prepare_task_switch(struct rq *rq, struct task_struct *next)
 {
+	fire_sched_out_preempt_hooks(current);
 	prepare_lock_switch(rq, next);
 	prepare_arch_switch(next);
 }
@@ -1702,6 +1751,7 @@ static inline void finish_task_switch(struct rq *rq, struct task_struct *prev)
 	prev_state = prev->state;
 	finish_arch_switch(prev);
 	finish_lock_switch(rq, prev);
+	fire_sched_in_preempt_hooks(current);
 	if (mm)
 		mmdrop(mm);
 	if (unlikely(prev_state == TASK_DEAD)) {
@@ -6314,6 +6364,10 @@ void __init sched_init(void)
 
 	set_load_weight(&init_task);
 
+#ifdef CONFIG_PREEMPT_HOOKS
+	INIT_HLIST_HEAD(&init_task.preempt_hooks);
+#endif
+
 #ifdef CONFIG_SMP
 	nr_cpu_ids = highest_cpu + 1;
 	open_softirq(SCHED_SOFTIRQ, run_rebalance_domains, NULL);
-- 
1.5.2.3


-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/2] KVM: Use the scheduler preemption hooks to make kvm preemptible
       [not found] ` <11841693332609-git-send-email-avi-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
  2007-07-11 15:55   ` [PATCH 1/2] SCHED: Generic hooks for trapping task preemption Avi Kivity
@ 2007-07-11 15:55   ` Avi Kivity
       [not found]     ` <1184169333786-git-send-email-avi-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
  1 sibling, 1 reply; 8+ messages in thread
From: Avi Kivity @ 2007-07-11 15:55 UTC (permalink / raw)
  To: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f
  Cc: linux-kernel-u79uwXL29TY76Z2rM5mHXA

Current kvm disables preemption while the new virtualization registers are
in use.  This of course is not very good for latency sensitive workloads (one
use of virtualization is to offload user interface and other latency
insensitive stuff to a container, so that it is easier to analyze the
remaining workload).  This patch re-enables preemption for kvm; preemption
is now only disabled when switching the registers in and out, and during
the switch to guest mode and back.

Contains fixes from Shaohua Li <shaohua.li-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>.

Signed-off-by: Avi Kivity <avi-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
---
 drivers/kvm/Kconfig    |    2 +-
 drivers/kvm/kvm.h      |    4 ++-
 drivers/kvm/kvm_main.c |   52 ++++++++++++++++++++++++++++++++++++++++-------
 drivers/kvm/mmu.c      |    2 -
 drivers/kvm/svm.c      |    6 +---
 drivers/kvm/vmx.c      |   17 ++++++++-------
 6 files changed, 59 insertions(+), 24 deletions(-)

diff --git a/drivers/kvm/Kconfig b/drivers/kvm/Kconfig
index 33fa28a..92644b5 100644
--- a/drivers/kvm/Kconfig
+++ b/drivers/kvm/Kconfig
@@ -10,7 +10,7 @@ if VIRTUALIZATION
 
 config KVM
 	tristate "Kernel-based Virtual Machine (KVM) support"
-	depends on X86 && EXPERIMENTAL
+	depends on X86 && EXPERIMENTAL && PREEMPT_HOOKS
 	depends on X86_CMPXCHG64 || 64BIT
 	---help---
 	  Support hosting fully virtualized guest machines using hardware
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 65ab268..f5e9741 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -13,6 +13,7 @@
 #include <linux/signal.h>
 #include <linux/sched.h>
 #include <linux/mm.h>
+#include <linux/preempt.h>
 #include <asm/signal.h>
 
 #include "vmx.h"
@@ -328,6 +329,7 @@ void kvm_io_bus_register_dev(struct kvm_io_bus *bus,
 
 struct kvm_vcpu {
 	struct kvm *kvm;
+	struct preempt_hook preempt_hook;
 	union {
 		struct vmcs *vmcs;
 		struct vcpu_svm *svm;
@@ -474,7 +476,7 @@ struct kvm_arch_ops {
 	int (*vcpu_create)(struct kvm_vcpu *vcpu);
 	void (*vcpu_free)(struct kvm_vcpu *vcpu);
 
-	void (*vcpu_load)(struct kvm_vcpu *vcpu);
+	void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
 	void (*vcpu_put)(struct kvm_vcpu *vcpu);
 	void (*vcpu_decache)(struct kvm_vcpu *vcpu);
 
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index a4429eb..5ca4ed5 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -54,6 +54,8 @@ static cpumask_t cpus_hardware_enabled;
 
 struct kvm_arch_ops *kvm_arch_ops;
 
+static __read_mostly struct preempt_ops kvm_preempt_ops;
+
 static void hardware_disable(void *ignored);
 
 #define STAT_OFFSET(x) offsetof(struct kvm_vcpu, stat.x)
@@ -234,8 +236,13 @@ EXPORT_SYMBOL_GPL(kvm_put_guest_fpu);
  */
 static void vcpu_load(struct kvm_vcpu *vcpu)
 {
+	int cpu;
+
 	mutex_lock(&vcpu->mutex);
-	kvm_arch_ops->vcpu_load(vcpu);
+	cpu = get_cpu();
+	preempt_hook_register(&vcpu->preempt_hook);
+	kvm_arch_ops->vcpu_load(vcpu, cpu);
+	put_cpu();
 }
 
 /*
@@ -245,19 +252,26 @@ static void vcpu_load(struct kvm_vcpu *vcpu)
 static struct kvm_vcpu *vcpu_load_slot(struct kvm *kvm, int slot)
 {
 	struct kvm_vcpu *vcpu = &kvm->vcpus[slot];
+	int cpu;
 
 	mutex_lock(&vcpu->mutex);
 	if (!vcpu->vmcs) {
 		mutex_unlock(&vcpu->mutex);
 		return NULL;
 	}
-	kvm_arch_ops->vcpu_load(vcpu);
+	cpu = get_cpu();
+	preempt_hook_register(&vcpu->preempt_hook);
+	kvm_arch_ops->vcpu_load(vcpu, cpu);
+	put_cpu();
 	return vcpu;
 }
 
 static void vcpu_put(struct kvm_vcpu *vcpu)
 {
+	preempt_disable();
 	kvm_arch_ops->vcpu_put(vcpu);
+	preempt_hook_unregister(&vcpu->preempt_hook);
+	preempt_enable();
 	mutex_unlock(&vcpu->mutex);
 }
 
@@ -1654,9 +1668,7 @@ void kvm_resched(struct kvm_vcpu *vcpu)
 {
 	if (!need_resched())
 		return;
-	vcpu_put(vcpu);
 	cond_resched();
-	vcpu_load(vcpu);
 }
 EXPORT_SYMBOL_GPL(kvm_resched);
 
@@ -1722,11 +1734,9 @@ static int pio_copy_data(struct kvm_vcpu *vcpu)
 	unsigned bytes;
 	int nr_pages = vcpu->pio.guest_pages[1] ? 2 : 1;
 
-	kvm_arch_ops->vcpu_put(vcpu);
 	q = vmap(vcpu->pio.guest_pages, nr_pages, VM_READ|VM_WRITE,
 		 PAGE_KERNEL);
 	if (!q) {
-		kvm_arch_ops->vcpu_load(vcpu);
 		free_pio_guest_pages(vcpu);
 		return -ENOMEM;
 	}
@@ -1738,7 +1748,6 @@ static int pio_copy_data(struct kvm_vcpu *vcpu)
 		memcpy(p, q, bytes);
 	q -= vcpu->pio.guest_page_offset;
 	vunmap(q);
-	kvm_arch_ops->vcpu_load(vcpu);
 	free_pio_guest_pages(vcpu);
 	return 0;
 }
@@ -2381,6 +2390,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
 	int r;
 	struct kvm_vcpu *vcpu;
 	struct page *page;
+	int cpu;
 
 	r = -EINVAL;
 	if (!valid_vcpu(n))
@@ -2420,7 +2430,11 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
 	if (r < 0)
 		goto out_free_vcpus;
 
-	kvm_arch_ops->vcpu_load(vcpu);
+	vcpu->preempt_hook.ops = &kvm_preempt_ops;
+	cpu = get_cpu();
+	preempt_hook_register(&vcpu->preempt_hook);
+	kvm_arch_ops->vcpu_load(vcpu, cpu);
+	put_cpu();
 	r = kvm_mmu_setup(vcpu);
 	if (r >= 0)
 		r = kvm_arch_ops->vcpu_setup(vcpu);
@@ -3137,6 +3151,25 @@ static struct sys_device kvm_sysdev = {
 
 hpa_t bad_page_address;
 
+static inline struct kvm_vcpu *preempt_hook_to_vcpu(struct preempt_hook *hook)
+{
+	return container_of(hook, struct kvm_vcpu, preempt_hook);
+}
+
+static void kvm_sched_in(struct preempt_hook *hook, int cpu)
+{
+	struct kvm_vcpu *vcpu = preempt_hook_to_vcpu(hook);
+
+	kvm_arch_ops->vcpu_load(vcpu, cpu);
+}
+
+static void kvm_sched_out(struct preempt_hook *hook)
+{
+	struct kvm_vcpu *vcpu = preempt_hook_to_vcpu(hook);
+
+	kvm_arch_ops->vcpu_put(vcpu);
+}
+
 int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module)
 {
 	int r;
@@ -3183,6 +3216,9 @@ int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module)
 		goto out_free;
 	}
 
+	kvm_preempt_ops.sched_in = kvm_sched_in;
+	kvm_preempt_ops.sched_out = kvm_sched_out;
+
 	return r;
 
 out_free:
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c
index b297a6b..f60aa87 100644
--- a/drivers/kvm/mmu.c
+++ b/drivers/kvm/mmu.c
@@ -254,9 +254,7 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
 	r = __mmu_topup_memory_caches(vcpu, GFP_NOWAIT);
 	if (r < 0) {
 		spin_unlock(&vcpu->kvm->lock);
-		kvm_arch_ops->vcpu_put(vcpu);
 		r = __mmu_topup_memory_caches(vcpu, GFP_KERNEL);
-		kvm_arch_ops->vcpu_load(vcpu);
 		spin_lock(&vcpu->kvm->lock);
 	}
 	return r;
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index bc818cc..20b4dc8 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -611,11 +611,10 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
 	kfree(vcpu->svm);
 }
 
-static void svm_vcpu_load(struct kvm_vcpu *vcpu)
+static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
-	int cpu, i;
+	int i;
 
-	cpu = get_cpu();
 	if (unlikely(cpu != vcpu->cpu)) {
 		u64 tsc_this, delta;
 
@@ -641,7 +640,6 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu)
 		wrmsrl(host_save_user_msrs[i], vcpu->svm->host_user_msrs[i]);
 
 	rdtscll(vcpu->host_tsc);
-	put_cpu();
 }
 
 static void svm_vcpu_decache(struct kvm_vcpu *vcpu)
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index 80628f6..c9f5c1b 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -345,6 +345,7 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
 static void vmx_load_host_state(struct kvm_vcpu *vcpu)
 {
 	struct vmx_host_state *hs = &vcpu->vmx_host_state;
+	unsigned long flags;
 
 	if (!hs->loaded)
 		return;
@@ -357,12 +358,12 @@ static void vmx_load_host_state(struct kvm_vcpu *vcpu)
 		 * If we have to reload gs, we must take care to
 		 * preserve our gs base.
 		 */
-		local_irq_disable();
+		local_irq_save(flags);
 		load_gs(hs->gs_sel);
 #ifdef CONFIG_X86_64
 		wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE));
 #endif
-		local_irq_enable();
+		local_irq_restore(flags);
 
 		reload_tss();
 	}
@@ -376,14 +377,11 @@ static void vmx_load_host_state(struct kvm_vcpu *vcpu)
  * Switches to specified vcpu, until a matching vcpu_put(), but assumes
  * vcpu mutex is already taken.
  */
-static void vmx_vcpu_load(struct kvm_vcpu *vcpu)
+static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	u64 phys_addr = __pa(vcpu->vmcs);
-	int cpu;
 	u64 tsc_this, delta;
 
-	cpu = get_cpu();
-
 	if (vcpu->cpu != cpu)
 		vcpu_clear(vcpu);
 
@@ -428,7 +426,6 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
 {
 	vmx_load_host_state(vcpu);
 	kvm_put_guest_fpu(vcpu);
-	put_cpu();
 }
 
 static void vmx_fpu_activate(struct kvm_vcpu *vcpu)
@@ -2000,6 +1997,8 @@ preempted:
 		kvm_guest_debug_pre(vcpu);
 
 again:
+	preempt_disable();
+
 	if (!vcpu->mmio_read_completed)
 		do_interrupt_requests(vcpu, kvm_run);
 
@@ -2146,6 +2145,9 @@ again:
 	vcpu->interrupt_window_open = (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0;
 
 	asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
+	vcpu->launched = 1;
+
+	preempt_enable();
 
 	if (unlikely(fail)) {
 		kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
@@ -2160,7 +2162,6 @@ again:
 	if (unlikely(prof_on == KVM_PROFILING))
 		profile_hit(KVM_PROFILING, (void *)vmcs_readl(GUEST_RIP));
 
-	vcpu->launched = 1;
 	r = kvm_handle_exit(kvm_run, vcpu);
 	if (r > 0) {
 		/* Give scheduler a change to reschedule. */
-- 
1.5.2.3


-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] KVM: Use the scheduler preemption hooks to make kvm preemptible
       [not found]     ` <1184169333786-git-send-email-avi-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
@ 2007-07-11 20:10       ` Luca
       [not found]         ` <68676e00707111310v4862974am37dc7c723486b96c-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 8+ messages in thread
From: Luca @ 2007-07-11 20:10 UTC (permalink / raw)
  To: Avi Kivity
  Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA

On 7/11/07, Avi Kivity <avi-atKUWr5tajBWk0Htik3J/w@public.gmane.org> wrote:
> Current kvm disables preemption while the new virtualization registers are
> in use.  This of course is not very good for latency sensitive workloads (one
> use of virtualization is to offload user interface and other latency
> insensitive stuff to a container, so that it is easier to analyze the
> remaining workload).  This patch re-enables preemption for kvm; preemption
> is now only disabled when switching the registers in and out, and during
> the switch to guest mode and back.
[...]
> diff --git a/drivers/kvm/Kconfig b/drivers/kvm/Kconfig
> index 33fa28a..92644b5 100644
> --- a/drivers/kvm/Kconfig
> +++ b/drivers/kvm/Kconfig
> @@ -10,7 +10,7 @@ if VIRTUALIZATION
>
>  config KVM
>         tristate "Kernel-based Virtual Machine (KVM) support"
> -       depends on X86 && EXPERIMENTAL
> +       depends on X86 && EXPERIMENTAL && PREEMPT_HOOKS
>         depends on X86_CMPXCHG64 || 64BIT

select PREEMPT_HOOKS maybe?

Luca

-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] SCHED: Generic hooks for trapping task preemption
       [not found]     ` <11841693333159-git-send-email-avi-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
@ 2007-07-12  2:56       ` Rusty Russell
  2007-07-12  5:14       ` Rusty Russell
  1 sibling, 0 replies; 8+ messages in thread
From: Rusty Russell @ 2007-07-12  2:56 UTC (permalink / raw)
  To: Avi Kivity
  Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA

On Wed, 2007-07-11 at 18:55 +0300, Avi Kivity wrote:
> +config PREEMPT_HOOKS
> +	bool
> +	depends on X86
> +	default y

Hmm, I would have thought that having CONFIG_KVM "select PREEMPT_HOOKS"
would be a little clearer.

> +static void fire_sched_in_preempt_hooks(struct task_struct *tsk)
> +{
> +	struct preempt_hook *hook;
> +	struct hlist_node *node;
> +
> +	hlist_for_each_entry(hook, node, &tsk->preempt_hooks, link)
> +		hook->ops->sched_in(hook, raw_smp_processor_id());
> +}

The old current/tsk conflation.  You hand in "tsk" here, but since
that's not handed through to the sched_in, it must be "current".  It's
just unfortunate that this generates better code...

But patch looks good.

Cheers,
Rusty.


-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] SCHED: Generic hooks for trapping task preemption
       [not found]     ` <11841693333159-git-send-email-avi-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
  2007-07-12  2:56       ` Rusty Russell
@ 2007-07-12  5:14       ` Rusty Russell
       [not found]         ` <1184217293.6005.754.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
  1 sibling, 1 reply; 8+ messages in thread
From: Rusty Russell @ 2007-07-12  5:14 UTC (permalink / raw)
  To: Avi Kivity
  Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA

On Wed, 2007-07-11 at 18:55 +0300, Avi Kivity wrote:
> +	fire_sched_out_preempt_hooks(current);
>    	prepare_lock_switch(rq, next);
>  	prepare_arch_switch(next);

Damn, I just found a use for this in lguest.

Any chance of handing "next" to the sched_out hook so we can optimize
the lguest <-> lguest switch?

FYI, lguest turns off PGE globally when you load the module, because
turning it off on each guest entry v. slow.  With these hooks I can be
far smarter...

Thanks!
Rusty.



-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] KVM: Use the scheduler preemption hooks to make kvm preemptible
       [not found]         ` <68676e00707111310v4862974am37dc7c723486b96c-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2007-07-12  5:43           ` Avi Kivity
  0 siblings, 0 replies; 8+ messages in thread
From: Avi Kivity @ 2007-07-12  5:43 UTC (permalink / raw)
  To: Luca
  Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA

Luca wrote:
> On 7/11/07, Avi Kivity <avi-atKUWr5tajBWk0Htik3J/w@public.gmane.org> wrote:
>> Current kvm disables preemption while the new virtualization
>> registers are
>> in use.  This of course is not very good for latency sensitive
>> workloads (one
>> use of virtualization is to offload user interface and other latency
>> insensitive stuff to a container, so that it is easier to analyze the
>> remaining workload).  This patch re-enables preemption for kvm;
>> preemption
>> is now only disabled when switching the registers in and out, and during
>> the switch to guest mode and back.
> [...]
>> diff --git a/drivers/kvm/Kconfig b/drivers/kvm/Kconfig
>> index 33fa28a..92644b5 100644
>> --- a/drivers/kvm/Kconfig
>> +++ b/drivers/kvm/Kconfig
>> @@ -10,7 +10,7 @@ if VIRTUALIZATION
>>
>>  config KVM
>>         tristate "Kernel-based Virtual Machine (KVM) support"
>> -       depends on X86 && EXPERIMENTAL
>> +       depends on X86 && EXPERIMENTAL && PREEMPT_HOOKS
>>         depends on X86_CMPXCHG64 || 64BIT
>
> select PREEMPT_HOOKS maybe?
>

Yes.  And default PREEMPT_HOOKS to n in the first patch.  I'll make both
these changes.


-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] SCHED: Generic hooks for trapping task preemption
       [not found]         ` <1184217293.6005.754.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
@ 2007-07-12  5:48           ` Avi Kivity
  0 siblings, 0 replies; 8+ messages in thread
From: Avi Kivity @ 2007-07-12  5:48 UTC (permalink / raw)
  To: Rusty Russell
  Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA

Rusty Russell wrote:
> On Wed, 2007-07-11 at 18:55 +0300, Avi Kivity wrote:
>   
>> +	fire_sched_out_preempt_hooks(current);
>>    	prepare_lock_switch(rq, next);
>>  	prepare_arch_switch(next);
>>     
>
> Damn, I just found a use for this in lguest.
>
> Any chance of handing "next" to the sched_out hook so we can optimize
> the lguest <-> lguest switch?
>
> FYI, lguest turns off PGE globally when you load the module, because
> turning it off on each guest entry v. slow.  With these hooks I can be
> far smarter...
>   

Sure thang.  Will be there in the next version.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2007-07-12  5:48 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-07-11 15:55 [PATCH 0/2][RFC] Scheduler preemption hooks, #2 Avi Kivity
     [not found] ` <11841693332609-git-send-email-avi-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-07-11 15:55   ` [PATCH 1/2] SCHED: Generic hooks for trapping task preemption Avi Kivity
     [not found]     ` <11841693333159-git-send-email-avi-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-07-12  2:56       ` Rusty Russell
2007-07-12  5:14       ` Rusty Russell
     [not found]         ` <1184217293.6005.754.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
2007-07-12  5:48           ` Avi Kivity
2007-07-11 15:55   ` [PATCH 2/2] KVM: Use the scheduler preemption hooks to make kvm preemptible Avi Kivity
     [not found]     ` <1184169333786-git-send-email-avi-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-07-11 20:10       ` Luca
     [not found]         ` <68676e00707111310v4862974am37dc7c723486b96c-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2007-07-12  5:43           ` Avi Kivity

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).