[PATCH 1/9] Unprotect a page if #PF happens during NMI injection.

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH 1/9] Unprotect a page if #PF happens during NMI injection.
@ 2009-05-05  8:14 Gleb Natapov
  2009-05-05  8:14 ` [PATCH 2/9] Do not allow interrupt injection from userspace if there is a pending event Gleb Natapov
                   ` (7 more replies)
  0 siblings, 8 replies; 21+ messages in thread
From: Gleb Natapov @ 2009-05-05  8:14 UTC (permalink / raw)
  To: avi; +Cc: kvm, Gleb Natapov

It is done for exception and interrupt already.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/kvm/svm.c |    3 +--
 arch/x86/kvm/vmx.c |    2 +-
 arch/x86/kvm/x86.h |    6 ++++++
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 8f411ff..c1ef2b9 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1090,8 +1090,7 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 	if (npt_enabled)
 		svm_flush_tlb(&svm->vcpu);
 	else {
-		if (svm->vcpu.arch.interrupt.pending ||
-				svm->vcpu.arch.exception.pending)
+		if (kvm_event_needs_reinjection(&svm->vcpu))
 			kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
 	}
 	return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e8a5649..a9b30e6 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2599,7 +2599,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		cr2 = vmcs_readl(EXIT_QUALIFICATION);
 		KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2,
 			    (u32)((u64)cr2 >> 32), handler);
-		if (vcpu->arch.interrupt.pending || vcpu->arch.exception.pending)
+		if (kvm_event_needs_reinjection(vcpu))
 			kvm_mmu_unprotect_page_virt(vcpu, cr2);
 		return kvm_mmu_page_fault(vcpu, cr2, error_code);
 	}
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 39350b2..21203d4 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -30,4 +30,10 @@ static inline u8 kvm_pop_irq(struct kvm_vcpu *vcpu)
 		clear_bit(word_index, &vcpu->arch.irq_summary);
 	return irq;
 }
+
+static inline bool kvm_event_needs_reinjection(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.exception.pending || vcpu->arch.interrupt.pending ||
+		vcpu->arch.nmi_injected;
+}
 #endif
-- 
1.6.2.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 2/9] Do not allow interrupt injection from userspace if there is a pending event.
  2009-05-05  8:14 [PATCH 1/9] Unprotect a page if #PF happens during NMI injection Gleb Natapov
@ 2009-05-05  8:14 ` Gleb Natapov
  2009-05-05  8:14 ` [PATCH 3/9] Remove irq_pending bitmap Gleb Natapov
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 21+ messages in thread
From: Gleb Natapov @ 2009-05-05  8:14 UTC (permalink / raw)
  To: avi; +Cc: kvm, Gleb Natapov


Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/kvm/x86.c |    5 +++--
 1 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2d7082c..12ab1cc 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3080,8 +3080,9 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu,
 		kvm_run->ready_for_interrupt_injection = 1;
 	else
 		kvm_run->ready_for_interrupt_injection =
-					(kvm_arch_interrupt_allowed(vcpu) &&
-					 !kvm_cpu_has_interrupt(vcpu));
+			kvm_arch_interrupt_allowed(vcpu) &&
+			!kvm_cpu_has_interrupt(vcpu) &&
+			!kvm_event_needs_reinjection(vcpu);
 }
 
 static void vapic_enter(struct kvm_vcpu *vcpu)
-- 
1.6.2.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 3/9] Remove irq_pending bitmap
  2009-05-05  8:14 [PATCH 1/9] Unprotect a page if #PF happens during NMI injection Gleb Natapov
  2009-05-05  8:14 ` [PATCH 2/9] Do not allow interrupt injection from userspace if there is a pending event Gleb Natapov
@ 2009-05-05  8:14 ` Gleb Natapov
  2009-05-06  5:55   ` Sheng Yang
  2009-05-05  8:14 ` [PATCH 4/9] [SVM] skip_emulated_instruction() decode an instruction if size is not known Gleb Natapov
                   ` (5 subsequent siblings)
  7 siblings, 1 reply; 21+ messages in thread
From: Gleb Natapov @ 2009-05-05  8:14 UTC (permalink / raw)
  To: avi; +Cc: kvm, Gleb Natapov

Only one interrupt vector can be injected from userspace irqchip at
any given time so no need to store it in a bitmap. Put it into interrupt
queue directly.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |    2 --
 arch/x86/kvm/irq.c              |    4 ++--
 arch/x86/kvm/x86.c              |   38 +++++++++++---------------------------
 arch/x86/kvm/x86.h              |   12 ------------
 4 files changed, 13 insertions(+), 43 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 8e680c3..cc892f5 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -266,8 +266,6 @@ struct kvm_mmu {
 
 struct kvm_vcpu_arch {
 	u64 host_tsc;
-	unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */
-	DECLARE_BITMAP(irq_pending, KVM_NR_INTERRUPTS);
 	/*
 	 * rip and regs accesses must go through
 	 * kvm_{register,rip}_{read,write} functions.
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index 11c2757..96dfbb6 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -50,7 +50,7 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
 	struct kvm_pic *s;
 
 	if (!irqchip_in_kernel(v->kvm))
-		return v->arch.irq_summary;
+		return v->arch.interrupt.pending;
 
 	if (kvm_apic_has_interrupt(v) == -1) {	/* LAPIC */
 		if (kvm_apic_accept_pic_intr(v)) {
@@ -72,7 +72,7 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
 	int vector;
 
 	if (!irqchip_in_kernel(v->kvm))
-		return kvm_pop_irq(v);
+		return v->arch.interrupt.nr;
 
 	vector = kvm_get_apic_interrupt(v);	/* APIC */
 	if (vector == -1) {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 12ab1cc..4596927 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1424,8 +1424,7 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
 		return -ENXIO;
 	vcpu_load(vcpu);
 
-	set_bit(irq->irq, vcpu->arch.irq_pending);
-	set_bit(irq->irq / BITS_PER_LONG, &vcpu->arch.irq_summary);
+	kvm_queue_interrupt(vcpu, irq->irq);
 
 	vcpu_put(vcpu);
 
@@ -3562,12 +3561,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
 	sregs->efer = vcpu->arch.shadow_efer;
 	sregs->apic_base = kvm_get_apic_base(vcpu);
 
-	if (irqchip_in_kernel(vcpu->kvm))
-		memset(sregs->interrupt_bitmap, 0,
-		       sizeof sregs->interrupt_bitmap);
-	else
-		memcpy(sregs->interrupt_bitmap, vcpu->arch.irq_pending,
-		       sizeof sregs->interrupt_bitmap);
+	memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap);
 
 	if (vcpu->arch.interrupt.pending)
 		set_bit(vcpu->arch.interrupt.nr,
@@ -4037,7 +4031,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 				  struct kvm_sregs *sregs)
 {
 	int mmu_reset_needed = 0;
-	int i, pending_vec, max_bits;
+	int pending_vec, max_bits;
 	struct descriptor_table dt;
 
 	vcpu_load(vcpu);
@@ -4079,24 +4073,14 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 	if (mmu_reset_needed)
 		kvm_mmu_reset_context(vcpu);
 
-	if (!irqchip_in_kernel(vcpu->kvm)) {
-		memcpy(vcpu->arch.irq_pending, sregs->interrupt_bitmap,
-		       sizeof vcpu->arch.irq_pending);
-		vcpu->arch.irq_summary = 0;
-		for (i = 0; i < ARRAY_SIZE(vcpu->arch.irq_pending); ++i)
-			if (vcpu->arch.irq_pending[i])
-				__set_bit(i, &vcpu->arch.irq_summary);
-	} else {
-		max_bits = (sizeof sregs->interrupt_bitmap) << 3;
-		pending_vec = find_first_bit(
-			(const unsigned long *)sregs->interrupt_bitmap,
-			max_bits);
-		/* Only pending external irq is handled here */
-		if (pending_vec < max_bits) {
-			kvm_queue_interrupt(vcpu, pending_vec);
-			pr_debug("Set back pending irq %d\n", pending_vec);
-		}
-		kvm_pic_clear_isr_ack(vcpu->kvm);
+	max_bits = (sizeof sregs->interrupt_bitmap) << 3;
+	pending_vec = find_first_bit(
+		(const unsigned long *)sregs->interrupt_bitmap, max_bits);
+	if (pending_vec < max_bits) {
+		kvm_queue_interrupt(vcpu, pending_vec);
+		pr_debug("Set back pending irq %d\n", pending_vec);
+		if (irqchip_in_kernel(vcpu->kvm))
+			kvm_pic_clear_isr_ack(vcpu->kvm);
 	}
 
 	kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 21203d4..c1f1a8c 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -19,18 +19,6 @@ static inline void kvm_clear_interrupt_queue(struct kvm_vcpu *vcpu)
 	vcpu->arch.interrupt.pending = false;
 }
 
-static inline u8 kvm_pop_irq(struct kvm_vcpu *vcpu)
-{
-	int word_index = __ffs(vcpu->arch.irq_summary);
-	int bit_index = __ffs(vcpu->arch.irq_pending[word_index]);
-	int irq = word_index * BITS_PER_LONG + bit_index;
-
-	clear_bit(bit_index, &vcpu->arch.irq_pending[word_index]);
-	if (!vcpu->arch.irq_pending[word_index])
-		clear_bit(word_index, &vcpu->arch.irq_summary);
-	return irq;
-}
-
 static inline bool kvm_event_needs_reinjection(struct kvm_vcpu *vcpu)
 {
 	return vcpu->arch.exception.pending || vcpu->arch.interrupt.pending ||
-- 
1.6.2.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 4/9] [SVM] skip_emulated_instruction() decode an instruction if size is not known
  2009-05-05  8:14 [PATCH 1/9] Unprotect a page if #PF happens during NMI injection Gleb Natapov
  2009-05-05  8:14 ` [PATCH 2/9] Do not allow interrupt injection from userspace if there is a pending event Gleb Natapov
  2009-05-05  8:14 ` [PATCH 3/9] Remove irq_pending bitmap Gleb Natapov
@ 2009-05-05  8:14 ` Gleb Natapov
  2009-05-05  8:14 ` [PATCH 5/9] [VMX] Do not re-execute INTn instruction Gleb Natapov
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 21+ messages in thread
From: Gleb Natapov @ 2009-05-05  8:14 UTC (permalink / raw)
  To: avi; +Cc: kvm, Gleb Natapov


Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/kvm/svm.c |   11 +++++------
 1 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index c1ef2b9..14cdfce 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -207,7 +207,9 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
 	struct vcpu_svm *svm = to_svm(vcpu);
 
 	if (!svm->next_rip) {
-		printk(KERN_DEBUG "%s: NOP\n", __func__);
+		if (emulate_instruction(vcpu, vcpu->run, 0, 0, EMULTYPE_SKIP) !=
+				EMULATE_DONE)
+			printk(KERN_DEBUG "%s: NOP\n", __func__);
 		return;
 	}
 	if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE)
@@ -1836,11 +1838,8 @@ static int task_switch_interception(struct vcpu_svm *svm,
 	if (reason != TASK_SWITCH_GATE ||
 	    int_type == SVM_EXITINTINFO_TYPE_SOFT ||
 	    (int_type == SVM_EXITINTINFO_TYPE_EXEPT &&
-	     (int_vec == OF_VECTOR || int_vec == BP_VECTOR))) {
-		if (emulate_instruction(&svm->vcpu, kvm_run, 0, 0,
-					EMULTYPE_SKIP) != EMULATE_DONE)
-			return 0;
-	}
+	     (int_vec == OF_VECTOR || int_vec == BP_VECTOR)))
+		skip_emulated_instruction(&svm->vcpu);
 
 	return kvm_task_switch(&svm->vcpu, tss_selector, reason);
 }
-- 
1.6.2.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 5/9] [VMX] Do not re-execute INTn instruction.
  2009-05-05  8:14 [PATCH 1/9] Unprotect a page if #PF happens during NMI injection Gleb Natapov
                   ` (2 preceding siblings ...)
  2009-05-05  8:14 ` [PATCH 4/9] [SVM] skip_emulated_instruction() decode an instruction if size is not known Gleb Natapov
@ 2009-05-05  8:14 ` Gleb Natapov
  2009-05-06  6:57   ` Sheng Yang
                     ` (2 more replies)
  2009-05-05  8:14 ` [PATCH 6/9] IRQ/NMI window should always be requested Gleb Natapov
                   ` (3 subsequent siblings)
  7 siblings, 3 replies; 21+ messages in thread
From: Gleb Natapov @ 2009-05-05  8:14 UTC (permalink / raw)
  To: avi; +Cc: kvm, Gleb Natapov

Re-inject event instead. This is what Intel suggest. Also use correct
instruction length when re-injecting soft fault/interrupt.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |    5 ++++-
 arch/x86/kvm/svm.c              |    6 +++---
 arch/x86/kvm/vmx.c              |   29 ++++++++++++++++++++++-------
 arch/x86/kvm/x86.c              |   13 ++++++++-----
 arch/x86/kvm/x86.h              |    9 ++++++++-
 5 files changed, 45 insertions(+), 17 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index cc892f5..fea0429 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -319,6 +319,8 @@ struct kvm_vcpu_arch {
 	struct kvm_pio_request pio;
 	void *pio_data;
 
+	u8 event_exit_inst_len;
+
 	struct kvm_queued_exception {
 		bool pending;
 		bool has_error_code;
@@ -328,6 +330,7 @@ struct kvm_vcpu_arch {
 
 	struct kvm_queued_interrupt {
 		bool pending;
+		bool soft;
 		u8 nr;
 	} interrupt;
 
@@ -510,7 +513,7 @@ struct kvm_x86_ops {
 	void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
 	void (*patch_hypercall)(struct kvm_vcpu *vcpu,
 				unsigned char *hypercall_addr);
-	void (*set_irq)(struct kvm_vcpu *vcpu, int vec);
+	void (*set_irq)(struct kvm_vcpu *vcpu, int vec, bool soft);
 	void (*set_nmi)(struct kvm_vcpu *vcpu);
 	void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr,
 				bool has_error_code, u32 error_code);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 14cdfce..d5173a2 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2284,7 +2284,7 @@ static void svm_queue_irq(struct kvm_vcpu *vcpu, unsigned nr)
 		SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
 }
 
-static void svm_set_irq(struct kvm_vcpu *vcpu, int irq)
+static void svm_set_irq(struct kvm_vcpu *vcpu, int irq, bool soft)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
@@ -2392,7 +2392,7 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
 	case SVM_EXITINTINFO_TYPE_EXEPT:
 		/* In case of software exception do not reinject an exception
 		   vector, but re-execute and instruction instead */
-		if (vector == BP_VECTOR || vector == OF_VECTOR)
+		if (kvm_exception_is_soft(vector))
 			break;
 		if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) {
 			u32 err = svm->vmcb->control.exit_int_info_err;
@@ -2402,7 +2402,7 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
 			kvm_queue_exception(&svm->vcpu, vector);
 		break;
 	case SVM_EXITINTINFO_TYPE_INTR:
-		kvm_queue_interrupt(&svm->vcpu, vector);
+		kvm_queue_interrupt(&svm->vcpu, vector, false);
 		break;
 	default:
 		break;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index a9b30e6..092a3ee 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -779,8 +779,9 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
 		return;
 	}
 
-	if (nr == BP_VECTOR || nr == OF_VECTOR) {
-		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
+	if (kvm_exception_is_soft(nr)) {
+		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
+			     vmx->vcpu.arch.event_exit_inst_len);
 		intr_info |= INTR_TYPE_SOFT_EXCEPTION;
 	} else
 		intr_info |= INTR_TYPE_HARD_EXCEPTION;
@@ -2429,9 +2430,10 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
 	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
 }
 
-static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
+static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq, bool soft)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	uint32_t intr;
 
 	KVMTRACE_1D(INJ_VIRQ, vcpu, (u32)irq, handler);
 
@@ -2446,8 +2448,14 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
 		kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1);
 		return;
 	}
-	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
-			irq | INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK);
+	intr = irq | INTR_INFO_VALID_MASK;
+	if (soft) {
+		intr |= INTR_TYPE_SOFT_INTR;
+		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
+			     vmx->vcpu.arch.event_exit_inst_len);
+	} else
+		intr |= INTR_TYPE_EXT_INTR;
+	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr);
 }
 
 static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
@@ -3008,6 +3016,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 					      GUEST_INTR_STATE_NMI);
 			break;
 		case INTR_TYPE_EXT_INTR:
+		case INTR_TYPE_SOFT_INTR:
 			kvm_clear_interrupt_queue(vcpu);
 			break;
 		case INTR_TYPE_HARD_EXCEPTION:
@@ -3279,16 +3288,22 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
 		vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
 				GUEST_INTR_STATE_NMI);
 		break;
-	case INTR_TYPE_HARD_EXCEPTION:
 	case INTR_TYPE_SOFT_EXCEPTION:
+		vmx->vcpu.arch.event_exit_inst_len =
+			vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
+	case INTR_TYPE_HARD_EXCEPTION:
 		if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
 			u32 err = vmcs_read32(IDT_VECTORING_ERROR_CODE);
 			kvm_queue_exception_e(&vmx->vcpu, vector, err);
 		} else
 			kvm_queue_exception(&vmx->vcpu, vector);
 		break;
+	case INTR_TYPE_SOFT_INTR:
+		vmx->vcpu.arch.event_exit_inst_len =
+			vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
 	case INTR_TYPE_EXT_INTR:
-		kvm_queue_interrupt(&vmx->vcpu, vector);
+		kvm_queue_interrupt(&vmx->vcpu, vector,
+			type == INTR_TYPE_SOFT_INTR);
 		break;
 	default:
 		break;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4596927..023842b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1424,7 +1424,7 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
 		return -ENXIO;
 	vcpu_load(vcpu);
 
-	kvm_queue_interrupt(vcpu, irq->irq);
+	kvm_queue_interrupt(vcpu, irq->irq, false);
 
 	vcpu_put(vcpu);
 
@@ -3136,7 +3136,8 @@ static void inject_irq(struct kvm_vcpu *vcpu)
 	}
 
 	if (vcpu->arch.interrupt.pending) {
-		kvm_x86_ops->set_irq(vcpu, vcpu->arch.interrupt.nr);
+		kvm_x86_ops->set_irq(vcpu, vcpu->arch.interrupt.nr,
+				     vcpu->arch.interrupt.soft);
 		return;
 	}
 
@@ -3149,8 +3150,10 @@ static void inject_irq(struct kvm_vcpu *vcpu)
 		}
 	} else if (kvm_cpu_has_interrupt(vcpu)) {
 		if (kvm_x86_ops->interrupt_allowed(vcpu)) {
-			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
-			kvm_x86_ops->set_irq(vcpu, vcpu->arch.interrupt.nr);
+			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
+					    false);
+			kvm_x86_ops->set_irq(vcpu, vcpu->arch.interrupt.nr,
+					     false);
 		}
 	}
 }
@@ -4077,7 +4080,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 	pending_vec = find_first_bit(
 		(const unsigned long *)sregs->interrupt_bitmap, max_bits);
 	if (pending_vec < max_bits) {
-		kvm_queue_interrupt(vcpu, pending_vec);
+		kvm_queue_interrupt(vcpu, pending_vec, false);
 		pr_debug("Set back pending irq %d\n", pending_vec);
 		if (irqchip_in_kernel(vcpu->kvm))
 			kvm_pic_clear_isr_ack(vcpu->kvm);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index c1f1a8c..2817c86 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -8,9 +8,11 @@ static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu)
 	vcpu->arch.exception.pending = false;
 }
 
-static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector)
+static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector,
+	bool soft)
 {
 	vcpu->arch.interrupt.pending = true;
+	vcpu->arch.interrupt.soft = soft;
 	vcpu->arch.interrupt.nr = vector;
 }
 
@@ -24,4 +26,9 @@ static inline bool kvm_event_needs_reinjection(struct kvm_vcpu *vcpu)
 	return vcpu->arch.exception.pending || vcpu->arch.interrupt.pending ||
 		vcpu->arch.nmi_injected;
 }
+
+static inline bool kvm_exception_is_soft(unsigned int nr)
+{
+	return (nr == BP_VECTOR) || (nr == OF_VECTOR) || (nr == DB_VECTOR);
+}
 #endif
-- 
1.6.2.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 6/9] IRQ/NMI window should always be requested.
  2009-05-05  8:14 [PATCH 1/9] Unprotect a page if #PF happens during NMI injection Gleb Natapov
                   ` (3 preceding siblings ...)
  2009-05-05  8:14 ` [PATCH 5/9] [VMX] Do not re-execute INTn instruction Gleb Natapov
@ 2009-05-05  8:14 ` Gleb Natapov
  2009-05-05  8:14 ` [PATCH 7/9] Drop interrupt shadow when single stepping should be done only on VMX Gleb Natapov
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 21+ messages in thread
From: Gleb Natapov @ 2009-05-05  8:14 UTC (permalink / raw)
  To: avi; +Cc: kvm, Gleb Natapov

Currently they are not requested if there is pending exception.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/kvm/x86.c |   30 ++++++++++++------------------
 1 files changed, 12 insertions(+), 18 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 023842b..bce49da 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3127,8 +3127,11 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
 	kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
 }
 
-static void inject_irq(struct kvm_vcpu *vcpu)
+static void inject_pending_irq(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
+	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+		kvm_x86_ops->drop_interrupt_shadow(vcpu);
+
 	/* try to reinject previous events if any */
 	if (vcpu->arch.nmi_injected) {
 		kvm_x86_ops->set_nmi(vcpu);
@@ -3158,26 +3161,11 @@ static void inject_irq(struct kvm_vcpu *vcpu)
 	}
 }
 
-static void inject_pending_irq(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-	bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
-		kvm_run->request_interrupt_window;
-
-	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
-		kvm_x86_ops->drop_interrupt_shadow(vcpu);
-
-	inject_irq(vcpu);
-
-	/* enable NMI/IRQ window open exits if needed */
-	if (vcpu->arch.nmi_pending)
-		kvm_x86_ops->enable_nmi_window(vcpu);
-	else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
-		kvm_x86_ops->enable_irq_window(vcpu);
-}
-
 static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	int r;
+	bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
+		kvm_run->request_interrupt_window;
 
 	if (vcpu->requests)
 		if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
@@ -3235,6 +3223,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	else
 		inject_pending_irq(vcpu, kvm_run);
 
+	/* enable NMI/IRQ window open exits if needed */
+	if (vcpu->arch.nmi_pending)
+		kvm_x86_ops->enable_nmi_window(vcpu);
+	else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
+		kvm_x86_ops->enable_irq_window(vcpu);
+
 	if (kvm_lapic_enabled(vcpu)) {
 		if (!vcpu->arch.apic->vapic_addr)
 			update_cr8_intercept(vcpu);
-- 
1.6.2.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 7/9] Drop interrupt shadow when single stepping should be done only on VMX.
  2009-05-05  8:14 [PATCH 1/9] Unprotect a page if #PF happens during NMI injection Gleb Natapov
                   ` (4 preceding siblings ...)
  2009-05-05  8:14 ` [PATCH 6/9] IRQ/NMI window should always be requested Gleb Natapov
@ 2009-05-05  8:14 ` Gleb Natapov
  2009-05-05  8:14 ` [PATCH 8/9] Replace pending exception by PF if it happens serially Gleb Natapov
  2009-05-05  8:14 ` [PATCH 9/9] [SVM] inject NMI after IRET from a previous NMI, not before Gleb Natapov
  7 siblings, 0 replies; 21+ messages in thread
From: Gleb Natapov @ 2009-05-05  8:14 UTC (permalink / raw)
  To: avi; +Cc: kvm, Gleb Natapov

The problem exists only on VMX. Also currently we skip this step if
there is pending exception. The patch fixes this too.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/kvm/vmx.c |    8 ++++++++
 arch/x86/kvm/x86.c |    3 ---
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 092a3ee..44647e6 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3361,6 +3361,14 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
 		vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
 
+	/* When single-stepping over STI and MOV SS, we must clear the
+	 * corresponding interruptibility bits in the guest state. Otherwise
+	 * vmentry fails as it then expects bit 14 (BS) in pending debug
+	 * exceptions being set, but that's not correct for the guest debugging
+	 * case. */
+	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+		vmx_drop_interrupt_shadow(vcpu);
+
 	/*
 	 * Loading guest fpu may have cleared host cr0.ts
 	 */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index bce49da..4ba00ab 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3129,9 +3129,6 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
 
 static void inject_pending_irq(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
-	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
-		kvm_x86_ops->drop_interrupt_shadow(vcpu);
-
 	/* try to reinject previous events if any */
 	if (vcpu->arch.nmi_injected) {
 		kvm_x86_ops->set_nmi(vcpu);
-- 
1.6.2.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 8/9] Replace pending exception by PF if it happens serially.
  2009-05-05  8:14 [PATCH 1/9] Unprotect a page if #PF happens during NMI injection Gleb Natapov
                   ` (5 preceding siblings ...)
  2009-05-05  8:14 ` [PATCH 7/9] Drop interrupt shadow when single stepping should be done only on VMX Gleb Natapov
@ 2009-05-05  8:14 ` Gleb Natapov
  2009-05-05  8:14 ` [PATCH 9/9] [SVM] inject NMI after IRET from a previous NMI, not before Gleb Natapov
  7 siblings, 0 replies; 21+ messages in thread
From: Gleb Natapov @ 2009-05-05  8:14 UTC (permalink / raw)
  To: avi; +Cc: kvm, Gleb Natapov

replace previous exception with a new one in a hope that instruction
re-execution will regenerate lost exception.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/kvm/x86.c |   19 ++++++++++++-------
 1 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4ba00ab..a869b89 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -177,16 +177,21 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr,
 	++vcpu->stat.pf_guest;
 
 	if (vcpu->arch.exception.pending) {
-		if (vcpu->arch.exception.nr == PF_VECTOR) {
-			printk(KERN_DEBUG "kvm: inject_page_fault:"
-					" double fault 0x%lx\n", addr);
-			vcpu->arch.exception.nr = DF_VECTOR;
-			vcpu->arch.exception.error_code = 0;
-		} else if (vcpu->arch.exception.nr == DF_VECTOR) {
+		switch(vcpu->arch.exception.nr) {
+		case DF_VECTOR:
 			/* triple fault -> shutdown */
 			set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
+		case PF_VECTOR:
+			vcpu->arch.exception.nr = DF_VECTOR;
+			vcpu->arch.exception.error_code = 0;
+			return;
+		default:
+			/* replace previous exception with a new one in a hope
+			   that instruction re-execution will regenerate lost
+			   exception */
+			vcpu->arch.exception.pending = false;
+			break;
 		}
-		return;
 	}
 	vcpu->arch.cr2 = addr;
 	kvm_queue_exception_e(vcpu, PF_VECTOR, error_code);
-- 
1.6.2.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 9/9] [SVM] inject NMI after IRET from a previous NMI, not before.
  2009-05-05  8:14 [PATCH 1/9] Unprotect a page if #PF happens during NMI injection Gleb Natapov
                   ` (6 preceding siblings ...)
  2009-05-05  8:14 ` [PATCH 8/9] Replace pending exception by PF if it happens serially Gleb Natapov
@ 2009-05-05  8:14 ` Gleb Natapov
  2009-05-05  8:45   ` Jan Kiszka
  2009-05-05  9:47   ` Gleb Natapov
  7 siblings, 2 replies; 21+ messages in thread
From: Gleb Natapov @ 2009-05-05  8:14 UTC (permalink / raw)
  To: avi; +Cc: kvm, Gleb Natapov

If NMI is received during handling of another NMI it should be injected
immediately after IRET from previous NMI handler, but SVM intercept IRET
before instruction execution so we can't inject pending NMI at this
point and there is not way to request exit when NMI window opens. This
patch fix SVM code to open NMI window after IRET by single stepping over
IRET instruction.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |    2 +
 arch/x86/kvm/svm.c              |   62 +++++++++++++++++++++++++++++++-------
 2 files changed, 52 insertions(+), 12 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index fea0429..bcd0857 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -358,6 +358,7 @@ struct kvm_vcpu_arch {
 	unsigned int time_offset;
 	struct page *time_page;
 
+	bool singlestep; /* guest is single stepped by KVM */
 	bool nmi_pending;
 	bool nmi_injected;
 
@@ -772,6 +773,7 @@ enum {
 #define HF_HIF_MASK		(1 << 1)
 #define HF_VINTR_MASK		(1 << 2)
 #define HF_NMI_MASK		(1 << 3)
+#define HF_IRET_MASK		(1 << 4)
 
 /*
  * Hardware virtualization extension instructions may fault if a
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index d5173a2..bf10991 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -933,15 +933,16 @@ static void svm_set_segment(struct kvm_vcpu *vcpu,
 
 }
 
-static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
+static void update_db_intercept(struct kvm_vcpu *vcpu)
 {
-	int old_debug = vcpu->guest_debug;
 	struct vcpu_svm *svm = to_svm(vcpu);
 
-	vcpu->guest_debug = dbg->control;
-
 	svm->vmcb->control.intercept_exceptions &=
 		~((1 << DB_VECTOR) | (1 << BP_VECTOR));
+
+	if (vcpu->arch.singlestep)
+		svm->vmcb->control.intercept_exceptions |= (1 << DB_VECTOR);
+
 	if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
 		if (vcpu->guest_debug &
 		    (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
@@ -952,6 +953,16 @@ static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
 				1 << BP_VECTOR;
 	} else
 		vcpu->guest_debug = 0;
+}
+
+static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
+{
+	int old_debug = vcpu->guest_debug;
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	vcpu->guest_debug = dbg->control;
+
+	update_db_intercept(vcpu);
 
 	if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
 		svm->vmcb->save.dr7 = dbg->arch.debugreg[7];
@@ -1101,14 +1112,30 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 {
 	if (!(svm->vcpu.guest_debug &
-	      (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
+	      (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) &&
+		!svm->vcpu.arch.singlestep) {
 		kvm_queue_exception(&svm->vcpu, DB_VECTOR);
 		return 1;
 	}
-	kvm_run->exit_reason = KVM_EXIT_DEBUG;
-	kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
-	kvm_run->debug.arch.exception = DB_VECTOR;
-	return 0;
+
+	if (svm->vcpu.arch.singlestep) {
+		svm->vcpu.arch.singlestep = false;
+		if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP))
+			svm->vmcb->save.rflags &=
+				~(X86_EFLAGS_TF | X86_EFLAGS_RF);
+		update_db_intercept(to_svm(svm));
+	}
+
+	if (svm->vcpu.guest_debug &
+	    (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)){
+		kvm_run->exit_reason = KVM_EXIT_DEBUG;
+		kvm_run->debug.arch.pc =
+			svm->vmcb->save.cs.base + svm->vmcb->save.rip;
+		kvm_run->debug.arch.exception = DB_VECTOR;
+		return 0;
+	}
+
+	return 1;
 }
 
 static int bp_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
@@ -1855,7 +1882,7 @@ static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 {
 	++svm->vcpu.stat.nmi_window_exits;
 	svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET);
-	svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
+	svm->vcpu.arch.hflags |= HF_IRET_MASK;
 	return 1;
 }
 
@@ -2331,8 +2358,16 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
-	if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
-		enable_irq_window(vcpu);
+	if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK))
+	    == HF_NMI_MASK)
+		return; /* IRET will cause a vm exit */
+
+	/* Something prevents NMI from been injected. Single step over
+	   possible problem (IRET or exception injection or interrupt
+	   shadow) */
+	vcpu->arch.singlestep = true;
+	svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
+	update_db_intercept(vcpu);
 }
 
 static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
@@ -2375,6 +2410,9 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
 	int type;
 	u32 exitintinfo = svm->vmcb->control.exit_int_info;
 
+	if (svm->vcpu.arch.hflags & HF_IRET_MASK)
+		svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK);
+
 	svm->vcpu.arch.nmi_injected = false;
 	kvm_clear_exception_queue(&svm->vcpu);
 	kvm_clear_interrupt_queue(&svm->vcpu);
-- 
1.6.2.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH 9/9] [SVM] inject NMI after IRET from a previous NMI, not before.
  2009-05-05  8:14 ` [PATCH 9/9] [SVM] inject NMI after IRET from a previous NMI, not before Gleb Natapov
@ 2009-05-05  8:45   ` Jan Kiszka
  2009-05-05  9:03     ` Gleb Natapov
  2009-05-05  9:47   ` Gleb Natapov
  1 sibling, 1 reply; 21+ messages in thread
From: Jan Kiszka @ 2009-05-05  8:45 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: avi, kvm

Gleb Natapov wrote:
> If NMI is received during handling of another NMI it should be injected
> immediately after IRET from previous NMI handler, but SVM intercept IRET
> before instruction execution so we can't inject pending NMI at this
> point and there is not way to request exit when NMI window opens. This
> patch fix SVM code to open NMI window after IRET by single stepping over
> IRET instruction.
> 
> Signed-off-by: Gleb Natapov <gleb@redhat.com>
> ---
>  arch/x86/include/asm/kvm_host.h |    2 +
>  arch/x86/kvm/svm.c              |   62 +++++++++++++++++++++++++++++++-------
>  2 files changed, 52 insertions(+), 12 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index fea0429..bcd0857 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -358,6 +358,7 @@ struct kvm_vcpu_arch {
>  	unsigned int time_offset;
>  	struct page *time_page;
>  
> +	bool singlestep; /* guest is single stepped by KVM */
>  	bool nmi_pending;
>  	bool nmi_injected;
>  
> @@ -772,6 +773,7 @@ enum {
>  #define HF_HIF_MASK		(1 << 1)
>  #define HF_VINTR_MASK		(1 << 2)
>  #define HF_NMI_MASK		(1 << 3)
> +#define HF_IRET_MASK		(1 << 4)
>  
>  /*
>   * Hardware virtualization extension instructions may fault if a
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index d5173a2..bf10991 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -933,15 +933,16 @@ static void svm_set_segment(struct kvm_vcpu *vcpu,
>  
>  }
>  
> -static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
> +static void update_db_intercept(struct kvm_vcpu *vcpu)
>  {
> -	int old_debug = vcpu->guest_debug;
>  	struct vcpu_svm *svm = to_svm(vcpu);
>  
> -	vcpu->guest_debug = dbg->control;
> -
>  	svm->vmcb->control.intercept_exceptions &=
>  		~((1 << DB_VECTOR) | (1 << BP_VECTOR));
> +
> +	if (vcpu->arch.singlestep)
> +		svm->vmcb->control.intercept_exceptions |= (1 << DB_VECTOR);
> +
>  	if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
>  		if (vcpu->guest_debug &
>  		    (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
> @@ -952,6 +953,16 @@ static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
>  				1 << BP_VECTOR;
>  	} else
>  		vcpu->guest_debug = 0;
> +}
> +
> +static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
> +{
> +	int old_debug = vcpu->guest_debug;
> +	struct vcpu_svm *svm = to_svm(vcpu);
> +
> +	vcpu->guest_debug = dbg->control;
> +
> +	update_db_intercept(vcpu);
>  
>  	if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
>  		svm->vmcb->save.dr7 = dbg->arch.debugreg[7];
> @@ -1101,14 +1112,30 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
>  static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
>  {
>  	if (!(svm->vcpu.guest_debug &
> -	      (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
> +	      (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) &&
> +		!svm->vcpu.arch.singlestep) {
>  		kvm_queue_exception(&svm->vcpu, DB_VECTOR);
>  		return 1;
>  	}
> -	kvm_run->exit_reason = KVM_EXIT_DEBUG;
> -	kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
> -	kvm_run->debug.arch.exception = DB_VECTOR;
> -	return 0;
> +
> +	if (svm->vcpu.arch.singlestep) {
> +		svm->vcpu.arch.singlestep = false;
> +		if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP))
> +			svm->vmcb->save.rflags &=
> +				~(X86_EFLAGS_TF | X86_EFLAGS_RF);
> +		update_db_intercept(to_svm(svm));
> +	}
> +
> +	if (svm->vcpu.guest_debug &
> +	    (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)){
> +		kvm_run->exit_reason = KVM_EXIT_DEBUG;
> +		kvm_run->debug.arch.pc =
> +			svm->vmcb->save.cs.base + svm->vmcb->save.rip;
> +		kvm_run->debug.arch.exception = DB_VECTOR;
> +		return 0;
> +	}
> +
> +	return 1;
>  }
>  
>  static int bp_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
> @@ -1855,7 +1882,7 @@ static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
>  {
>  	++svm->vcpu.stat.nmi_window_exits;
>  	svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET);
> -	svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
> +	svm->vcpu.arch.hflags |= HF_IRET_MASK;
>  	return 1;
>  }
>  
> @@ -2331,8 +2358,16 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
>  {
>  	struct vcpu_svm *svm = to_svm(vcpu);
>  
> -	if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
> -		enable_irq_window(vcpu);
> +	if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK))
> +	    == HF_NMI_MASK)
> +		return; /* IRET will cause a vm exit */
> +
> +	/* Something prevents NMI from been injected. Single step over
> +	   possible problem (IRET or exception injection or interrupt
> +	   shadow) */
> +	vcpu->arch.singlestep = true;
> +	svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);

Can you single-step like this out of an IRQ handler? I mean, IRET will
restore the flags from the stack, and those settings should be
overwritten. Or am I missing something?

> +	update_db_intercept(vcpu);
>  }
>  
>  static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
> @@ -2375,6 +2410,9 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
>  	int type;
>  	u32 exitintinfo = svm->vmcb->control.exit_int_info;
>  
> +	if (svm->vcpu.arch.hflags & HF_IRET_MASK)
> +		svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK);
> +
>  	svm->vcpu.arch.nmi_injected = false;
>  	kvm_clear_exception_queue(&svm->vcpu);
>  	kvm_clear_interrupt_queue(&svm->vcpu);

Jan

-- 
Siemens AG, Corporate Technology, CT SE 2
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 9/9] [SVM] inject NMI after IRET from a previous NMI, not before.
  2009-05-05  8:45   ` Jan Kiszka
@ 2009-05-05  9:03     ` Gleb Natapov
  2009-05-05  9:25       ` Jan Kiszka
  0 siblings, 1 reply; 21+ messages in thread
From: Gleb Natapov @ 2009-05-05  9:03 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: avi, kvm

On Tue, May 05, 2009 at 10:45:20AM +0200, Jan Kiszka wrote:
> > @@ -2331,8 +2358,16 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
> >  {
> >  	struct vcpu_svm *svm = to_svm(vcpu);
> >  
> > -	if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
> > -		enable_irq_window(vcpu);
> > +	if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK))
> > +	    == HF_NMI_MASK)
> > +		return; /* IRET will cause a vm exit */
> > +
> > +	/* Something prevents NMI from been injected. Single step over
> > +	   possible problem (IRET or exception injection or interrupt
> > +	   shadow) */
> > +	vcpu->arch.singlestep = true;
> > +	svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
> 
> Can you single-step like this out of an IRQ handler? I mean, IRET will
> restore the flags from the stack, and those settings should be
> overwritten. Or am I missing something?
> 
It seems to be working :) Shouldn't CPU checks single step before
executing IRET and thus using old flags value? It is interesting to
check what rflag value is immediately after IRET.

--
			Gleb.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 9/9] [SVM] inject NMI after IRET from a previous NMI, not before.
  2009-05-05  9:03     ` Gleb Natapov
@ 2009-05-05  9:25       ` Jan Kiszka
  2009-05-05  9:32         ` Gleb Natapov
  0 siblings, 1 reply; 21+ messages in thread
From: Jan Kiszka @ 2009-05-05  9:25 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: avi, kvm

Gleb Natapov wrote:
> On Tue, May 05, 2009 at 10:45:20AM +0200, Jan Kiszka wrote:
>>> @@ -2331,8 +2358,16 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
>>>  {
>>>  	struct vcpu_svm *svm = to_svm(vcpu);
>>>  
>>> -	if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
>>> -		enable_irq_window(vcpu);
>>> +	if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK))
>>> +	    == HF_NMI_MASK)
>>> +		return; /* IRET will cause a vm exit */
>>> +
>>> +	/* Something prevents NMI from been injected. Single step over
>>> +	   possible problem (IRET or exception injection or interrupt
>>> +	   shadow) */
>>> +	vcpu->arch.singlestep = true;
>>> +	svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
>> Can you single-step like this out of an IRQ handler? I mean, IRET will
>> restore the flags from the stack, and those settings should be
>> overwritten. Or am I missing something?
>>
> It seems to be working :) Shouldn't CPU checks single step before
> executing IRET and thus using old flags value? It is interesting to
> check what rflag value is immediately after IRET.

Hmm, guess I have to re-read some manuals. But regarding
rflags-after-iret, I think it should be cleared due to that restoring
from the stack.

Jan

-- 
Siemens AG, Corporate Technology, CT SE 2
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 9/9] [SVM] inject NMI after IRET from a previous NMI, not before.
  2009-05-05  9:25       ` Jan Kiszka
@ 2009-05-05  9:32         ` Gleb Natapov
  0 siblings, 0 replies; 21+ messages in thread
From: Gleb Natapov @ 2009-05-05  9:32 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: avi, kvm

On Tue, May 05, 2009 at 11:25:13AM +0200, Jan Kiszka wrote:
> Gleb Natapov wrote:
> > On Tue, May 05, 2009 at 10:45:20AM +0200, Jan Kiszka wrote:
> >>> @@ -2331,8 +2358,16 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
> >>>  {
> >>>  	struct vcpu_svm *svm = to_svm(vcpu);
> >>>  
> >>> -	if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
> >>> -		enable_irq_window(vcpu);
> >>> +	if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK))
> >>> +	    == HF_NMI_MASK)
> >>> +		return; /* IRET will cause a vm exit */
> >>> +
> >>> +	/* Something prevents NMI from been injected. Single step over
> >>> +	   possible problem (IRET or exception injection or interrupt
> >>> +	   shadow) */
> >>> +	vcpu->arch.singlestep = true;
> >>> +	svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
> >> Can you single-step like this out of an IRQ handler? I mean, IRET will
> >> restore the flags from the stack, and those settings should be
> >> overwritten. Or am I missing something?
> >>
> > It seems to be working :) Shouldn't CPU checks single step before
> > executing IRET and thus using old flags value? It is interesting to
> > check what rflag value is immediately after IRET.
> 
> Hmm, guess I have to re-read some manuals. But regarding
> rflags-after-iret, I think it should be cleared due to that restoring
> from the stack.
> 
Just re-tested this once more. DB is intercepted after IRET and TF/RF is
cleared already.

--
			Gleb.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 9/9] [SVM] inject NMI after IRET from a previous NMI, not before.
  2009-05-05  8:14 ` [PATCH 9/9] [SVM] inject NMI after IRET from a previous NMI, not before Gleb Natapov
  2009-05-05  8:45   ` Jan Kiszka
@ 2009-05-05  9:47   ` Gleb Natapov
  1 sibling, 0 replies; 21+ messages in thread
From: Gleb Natapov @ 2009-05-05  9:47 UTC (permalink / raw)
  To: avi; +Cc: kvm

I noticed a small bug in previous patch. Use this one instead.
(change update_db_intercept(to_svm(svm)) -> update_db_intercept(&svm->vcpu))


If NMI is received during handling of another NMI it should be injected
immediately after IRET from previous NMI handler, but SVM intercept IRET
before instruction execution so we can't inject pending NMI at this
point and there is not way to request exit when NMI window opens. This
patch fix SVM code to open NMI window after IRET by single stepping over
IRET instruction.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index fea0429..bcd0857 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -358,6 +358,7 @@ struct kvm_vcpu_arch {
 	unsigned int time_offset;
 	struct page *time_page;
 
+	bool singlestep; /* guest is single stepped by KVM */
 	bool nmi_pending;
 	bool nmi_injected;
 
@@ -772,6 +773,7 @@ enum {
 #define HF_HIF_MASK		(1 << 1)
 #define HF_VINTR_MASK		(1 << 2)
 #define HF_NMI_MASK		(1 << 3)
+#define HF_IRET_MASK		(1 << 4)
 
 /*
  * Hardware virtualization extension instructions may fault if a
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index d5173a2..5c00258 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -933,15 +933,16 @@ static void svm_set_segment(struct kvm_vcpu *vcpu,
 
 }
 
-static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
+static void update_db_intercept(struct kvm_vcpu *vcpu)
 {
-	int old_debug = vcpu->guest_debug;
 	struct vcpu_svm *svm = to_svm(vcpu);
 
-	vcpu->guest_debug = dbg->control;
-
 	svm->vmcb->control.intercept_exceptions &=
 		~((1 << DB_VECTOR) | (1 << BP_VECTOR));
+
+	if (vcpu->arch.singlestep)
+		svm->vmcb->control.intercept_exceptions |= (1 << DB_VECTOR);
+
 	if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
 		if (vcpu->guest_debug &
 		    (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
@@ -952,6 +953,16 @@ static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
 				1 << BP_VECTOR;
 	} else
 		vcpu->guest_debug = 0;
+}
+
+static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
+{
+	int old_debug = vcpu->guest_debug;
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	vcpu->guest_debug = dbg->control;
+
+	update_db_intercept(vcpu);
 
 	if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
 		svm->vmcb->save.dr7 = dbg->arch.debugreg[7];
@@ -1101,14 +1112,30 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 {
 	if (!(svm->vcpu.guest_debug &
-	      (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
+	      (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) &&
+		!svm->vcpu.arch.singlestep) {
 		kvm_queue_exception(&svm->vcpu, DB_VECTOR);
 		return 1;
 	}
-	kvm_run->exit_reason = KVM_EXIT_DEBUG;
-	kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
-	kvm_run->debug.arch.exception = DB_VECTOR;
-	return 0;
+
+	if (svm->vcpu.arch.singlestep) {
+		svm->vcpu.arch.singlestep = false;
+		if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP))
+			svm->vmcb->save.rflags &=
+				~(X86_EFLAGS_TF | X86_EFLAGS_RF);
+		update_db_intercept(&svm->vcpu);
+	}
+
+	if (svm->vcpu.guest_debug &
+	    (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)){
+		kvm_run->exit_reason = KVM_EXIT_DEBUG;
+		kvm_run->debug.arch.pc =
+			svm->vmcb->save.cs.base + svm->vmcb->save.rip;
+		kvm_run->debug.arch.exception = DB_VECTOR;
+		return 0;
+	}
+
+	return 1;
 }
 
 static int bp_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
@@ -1855,7 +1882,7 @@ static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 {
 	++svm->vcpu.stat.nmi_window_exits;
 	svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET);
-	svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
+	svm->vcpu.arch.hflags |= HF_IRET_MASK;
 	return 1;
 }
 
@@ -2331,8 +2358,16 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
-	if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
-		enable_irq_window(vcpu);
+	if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK))
+	    == HF_NMI_MASK)
+		return; /* IRET will cause a vm exit */
+
+	/* Something prevents NMI from been injected. Single step over
+	   possible problem (IRET or exception injection or interrupt
+	   shadow) */
+	vcpu->arch.singlestep = true;
+	svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
+	update_db_intercept(vcpu);
 }
 
 static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
@@ -2375,6 +2410,9 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
 	int type;
 	u32 exitintinfo = svm->vmcb->control.exit_int_info;
 
+	if (svm->vcpu.arch.hflags & HF_IRET_MASK)
+		svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK);
+
 	svm->vcpu.arch.nmi_injected = false;
 	kvm_clear_exception_queue(&svm->vcpu);
 	kvm_clear_interrupt_queue(&svm->vcpu);
--
			Gleb.

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH 3/9] Remove irq_pending bitmap
  2009-05-05  8:14 ` [PATCH 3/9] Remove irq_pending bitmap Gleb Natapov
@ 2009-05-06  5:55   ` Sheng Yang
  2009-05-06  6:50     ` Sheng Yang
  0 siblings, 1 reply; 21+ messages in thread
From: Sheng Yang @ 2009-05-06  5:55 UTC (permalink / raw)
  To: kvm; +Cc: Gleb Natapov, avi

On Tuesday 05 May 2009 16:14:29 Gleb Natapov wrote:
> Only one interrupt vector can be injected from userspace irqchip at
> any given time so no need to store it in a bitmap. Put it into interrupt
> queue directly.
>
> Signed-off-by: Gleb Natapov <gleb@redhat.com>
> ---
>  arch/x86/include/asm/kvm_host.h |    2 --
>  arch/x86/kvm/irq.c              |    4 ++--
>  arch/x86/kvm/x86.c              |   38
> +++++++++++--------------------------- arch/x86/kvm/x86.h              |  
> 12 ------------
>  4 files changed, 13 insertions(+), 43 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h
> b/arch/x86/include/asm/kvm_host.h index 8e680c3..cc892f5 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -266,8 +266,6 @@ struct kvm_mmu {
>
>  struct kvm_vcpu_arch {
>  	u64 host_tsc;
> -	unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */
> -	DECLARE_BITMAP(irq_pending, KVM_NR_INTERRUPTS);
>  	/*
>  	 * rip and regs accesses must go through
>  	 * kvm_{register,rip}_{read,write} functions.
> diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
> index 11c2757..96dfbb6 100644
> --- a/arch/x86/kvm/irq.c
> +++ b/arch/x86/kvm/irq.c
> @@ -50,7 +50,7 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
>  	struct kvm_pic *s;
>
>  	if (!irqchip_in_kernel(v->kvm))
> -		return v->arch.irq_summary;
> +		return v->arch.interrupt.pending;
>
>  	if (kvm_apic_has_interrupt(v) == -1) {	/* LAPIC */
>  		if (kvm_apic_accept_pic_intr(v)) {
> @@ -72,7 +72,7 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
>  	int vector;
>
>  	if (!irqchip_in_kernel(v->kvm))
> -		return kvm_pop_irq(v);
> +		return v->arch.interrupt.nr;
>
>  	vector = kvm_get_apic_interrupt(v);	/* APIC */
>  	if (vector == -1) {
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 12ab1cc..4596927 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -1424,8 +1424,7 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu
> *vcpu, return -ENXIO;
>  	vcpu_load(vcpu);
>
> -	set_bit(irq->irq, vcpu->arch.irq_pending);
> -	set_bit(irq->irq / BITS_PER_LONG, &vcpu->arch.irq_summary);
> +	kvm_queue_interrupt(vcpu, irq->irq);
>
>  	vcpu_put(vcpu);
>
> @@ -3562,12 +3561,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu
> *vcpu, sregs->efer = vcpu->arch.shadow_efer;
>  	sregs->apic_base = kvm_get_apic_base(vcpu);
>
> -	if (irqchip_in_kernel(vcpu->kvm))
> -		memset(sregs->interrupt_bitmap, 0,
> -		       sizeof sregs->interrupt_bitmap);

? When did we discard the saving of pending interrupt for irqchip_in_kernel?

> -	else
> -		memcpy(sregs->interrupt_bitmap, vcpu->arch.irq_pending,
> -		       sizeof sregs->interrupt_bitmap);
> +	memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap);

No need to save any pending interrupts? Did I miss anything?

>  	if (vcpu->arch.interrupt.pending)
>  		set_bit(vcpu->arch.interrupt.nr,
> @@ -4037,7 +4031,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu
> *vcpu, struct kvm_sregs *sregs)
>  {
>  	int mmu_reset_needed = 0;
> -	int i, pending_vec, max_bits;
> +	int pending_vec, max_bits;
>  	struct descriptor_table dt;
>
>  	vcpu_load(vcpu);
> @@ -4079,24 +4073,14 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu
> *vcpu, if (mmu_reset_needed)
>  		kvm_mmu_reset_context(vcpu);
>
> -	if (!irqchip_in_kernel(vcpu->kvm)) {
> -		memcpy(vcpu->arch.irq_pending, sregs->interrupt_bitmap,
> -		       sizeof vcpu->arch.irq_pending);
> -		vcpu->arch.irq_summary = 0;
> -		for (i = 0; i < ARRAY_SIZE(vcpu->arch.irq_pending); ++i)
> -			if (vcpu->arch.irq_pending[i])
> -				__set_bit(i, &vcpu->arch.irq_summary);
> -	} else {
> -		max_bits = (sizeof sregs->interrupt_bitmap) << 3;
> -		pending_vec = find_first_bit(
> -			(const unsigned long *)sregs->interrupt_bitmap,
> -			max_bits);
> -		/* Only pending external irq is handled here */
> -		if (pending_vec < max_bits) {
> -			kvm_queue_interrupt(vcpu, pending_vec);
> -			pr_debug("Set back pending irq %d\n", pending_vec);
> -		}
> -		kvm_pic_clear_isr_ack(vcpu->kvm);
> +	max_bits = (sizeof sregs->interrupt_bitmap) << 3;

If interrupt_bitmap is always zero as above, why we got this... For 
compatible?

-- 
regards
Yang, Sheng

> +	pending_vec = find_first_bit(
> +		(const unsigned long *)sregs->interrupt_bitmap, max_bits);
> +	if (pending_vec < max_bits) {
> +		kvm_queue_interrupt(vcpu, pending_vec);
> +		pr_debug("Set back pending irq %d\n", pending_vec);
> +		if (irqchip_in_kernel(vcpu->kvm))
> +			kvm_pic_clear_isr_ack(vcpu->kvm);
>  	}
>
>  	kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index 21203d4..c1f1a8c 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -19,18 +19,6 @@ static inline void kvm_clear_interrupt_queue(struct
> kvm_vcpu *vcpu) vcpu->arch.interrupt.pending = false;
>  }
>
> -static inline u8 kvm_pop_irq(struct kvm_vcpu *vcpu)
> -{
> -	int word_index = __ffs(vcpu->arch.irq_summary);
> -	int bit_index = __ffs(vcpu->arch.irq_pending[word_index]);
> -	int irq = word_index * BITS_PER_LONG + bit_index;
> -
> -	clear_bit(bit_index, &vcpu->arch.irq_pending[word_index]);
> -	if (!vcpu->arch.irq_pending[word_index])
> -		clear_bit(word_index, &vcpu->arch.irq_summary);
> -	return irq;
> -}
> -
>  static inline bool kvm_event_needs_reinjection(struct kvm_vcpu *vcpu)
>  {
>  	return vcpu->arch.exception.pending || vcpu->arch.interrupt.pending ||



^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 3/9] Remove irq_pending bitmap
  2009-05-06  5:55   ` Sheng Yang
@ 2009-05-06  6:50     ` Sheng Yang
  0 siblings, 0 replies; 21+ messages in thread
From: Sheng Yang @ 2009-05-06  6:50 UTC (permalink / raw)
  To: kvm; +Cc: Gleb Natapov, avi

On Wednesday 06 May 2009 13:55:44 Sheng Yang wrote:
> On Tuesday 05 May 2009 16:14:29 Gleb Natapov wrote:
> > Only one interrupt vector can be injected from userspace irqchip at
> > any given time so no need to store it in a bitmap. Put it into interrupt
> > queue directly.
> >
> > Signed-off-by: Gleb Natapov <gleb@redhat.com>
> > ---
> >  arch/x86/include/asm/kvm_host.h |    2 --
> >  arch/x86/kvm/irq.c              |    4 ++--
> >  arch/x86/kvm/x86.c              |   38
> > +++++++++++--------------------------- arch/x86/kvm/x86.h              |
> > 12 ------------
> >  4 files changed, 13 insertions(+), 43 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/kvm_host.h
> > b/arch/x86/include/asm/kvm_host.h index 8e680c3..cc892f5 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -266,8 +266,6 @@ struct kvm_mmu {
> >
> >  struct kvm_vcpu_arch {
> >  	u64 host_tsc;
> > -	unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */
> > -	DECLARE_BITMAP(irq_pending, KVM_NR_INTERRUPTS);
> >  	/*
> >  	 * rip and regs accesses must go through
> >  	 * kvm_{register,rip}_{read,write} functions.
> > diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
> > index 11c2757..96dfbb6 100644
> > --- a/arch/x86/kvm/irq.c
> > +++ b/arch/x86/kvm/irq.c
> > @@ -50,7 +50,7 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
> >  	struct kvm_pic *s;
> >
> >  	if (!irqchip_in_kernel(v->kvm))
> > -		return v->arch.irq_summary;
> > +		return v->arch.interrupt.pending;
> >
> >  	if (kvm_apic_has_interrupt(v) == -1) {	/* LAPIC */
> >  		if (kvm_apic_accept_pic_intr(v)) {
> > @@ -72,7 +72,7 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
> >  	int vector;
> >
> >  	if (!irqchip_in_kernel(v->kvm))
> > -		return kvm_pop_irq(v);
> > +		return v->arch.interrupt.nr;
> >
> >  	vector = kvm_get_apic_interrupt(v);	/* APIC */
> >  	if (vector == -1) {
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index 12ab1cc..4596927 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -1424,8 +1424,7 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu
> > *vcpu, return -ENXIO;
> >  	vcpu_load(vcpu);
> >
> > -	set_bit(irq->irq, vcpu->arch.irq_pending);
> > -	set_bit(irq->irq / BITS_PER_LONG, &vcpu->arch.irq_summary);
> > +	kvm_queue_interrupt(vcpu, irq->irq);
> >
> >  	vcpu_put(vcpu);
> >
> > @@ -3562,12 +3561,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu
> > *vcpu, sregs->efer = vcpu->arch.shadow_efer;
> >  	sregs->apic_base = kvm_get_apic_base(vcpu);
> >
> > -	if (irqchip_in_kernel(vcpu->kvm))
> > -		memset(sregs->interrupt_bitmap, 0,
> > -		       sizeof sregs->interrupt_bitmap);
>
> ? When did we discard the saving of pending interrupt for
> irqchip_in_kernel?

Oh, sorry. Please ignore this.

Finally found I missed the latest update. :(

-- 
regards
Yang, Sheng

>
> > -	else
> > -		memcpy(sregs->interrupt_bitmap, vcpu->arch.irq_pending,
> > -		       sizeof sregs->interrupt_bitmap);
> > +	memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap);
>
> No need to save any pending interrupts? Did I miss anything?
>
> >  	if (vcpu->arch.interrupt.pending)
> >  		set_bit(vcpu->arch.interrupt.nr,
> > @@ -4037,7 +4031,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu
> > *vcpu, struct kvm_sregs *sregs)
> >  {
> >  	int mmu_reset_needed = 0;
> > -	int i, pending_vec, max_bits;
> > +	int pending_vec, max_bits;
> >  	struct descriptor_table dt;
> >
> >  	vcpu_load(vcpu);
> > @@ -4079,24 +4073,14 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu
> > *vcpu, if (mmu_reset_needed)
> >  		kvm_mmu_reset_context(vcpu);
> >
> > -	if (!irqchip_in_kernel(vcpu->kvm)) {
> > -		memcpy(vcpu->arch.irq_pending, sregs->interrupt_bitmap,
> > -		       sizeof vcpu->arch.irq_pending);
> > -		vcpu->arch.irq_summary = 0;
> > -		for (i = 0; i < ARRAY_SIZE(vcpu->arch.irq_pending); ++i)
> > -			if (vcpu->arch.irq_pending[i])
> > -				__set_bit(i, &vcpu->arch.irq_summary);
> > -	} else {
> > -		max_bits = (sizeof sregs->interrupt_bitmap) << 3;
> > -		pending_vec = find_first_bit(
> > -			(const unsigned long *)sregs->interrupt_bitmap,
> > -			max_bits);
> > -		/* Only pending external irq is handled here */
> > -		if (pending_vec < max_bits) {
> > -			kvm_queue_interrupt(vcpu, pending_vec);
> > -			pr_debug("Set back pending irq %d\n", pending_vec);
> > -		}
> > -		kvm_pic_clear_isr_ack(vcpu->kvm);
> > +	max_bits = (sizeof sregs->interrupt_bitmap) << 3;
>
> If interrupt_bitmap is always zero as above, why we got this... For
> compatible?



^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 5/9] [VMX] Do not re-execute INTn instruction.
  2009-05-05  8:14 ` [PATCH 5/9] [VMX] Do not re-execute INTn instruction Gleb Natapov
@ 2009-05-06  6:57   ` Sheng Yang
  2009-05-06  9:27     ` Gleb Natapov
  2009-05-06 10:59   ` Gregory Haskins
  2009-05-06 11:46   ` Gleb Natapov
  2 siblings, 1 reply; 21+ messages in thread
From: Sheng Yang @ 2009-05-06  6:57 UTC (permalink / raw)
  To: kvm; +Cc: Gleb Natapov, avi

On Tuesday 05 May 2009 16:14:31 Gleb Natapov wrote:
> Re-inject event instead. This is what Intel suggest. Also use correct
> instruction length when re-injecting soft fault/interrupt.

Thanks for fixing this!

> Signed-off-by: Gleb Natapov <gleb@redhat.com>
> ---
>  arch/x86/include/asm/kvm_host.h |    5 ++++-
>  arch/x86/kvm/svm.c              |    6 +++---
>  arch/x86/kvm/vmx.c              |   29 ++++++++++++++++++++++-------
>  arch/x86/kvm/x86.c              |   13 ++++++++-----
>  arch/x86/kvm/x86.h              |    9 ++++++++-
>  5 files changed, 45 insertions(+), 17 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h
> b/arch/x86/include/asm/kvm_host.h index cc892f5..fea0429 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -319,6 +319,8 @@ struct kvm_vcpu_arch {
>  	struct kvm_pio_request pio;
>  	void *pio_data;
>
> +	u8 event_exit_inst_len;
> +

Can we simply read the field when we need, instead of a new field?

>  	struct kvm_queued_exception {
>  		bool pending;
>  		bool has_error_code;
> @@ -328,6 +330,7 @@ struct kvm_vcpu_arch {
>
>  	struct kvm_queued_interrupt {
>  		bool pending;
> +		bool soft;
>  		u8 nr;
>  	} interrupt;
>
> @@ -510,7 +513,7 @@ struct kvm_x86_ops {
>  	void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
>  	void (*patch_hypercall)(struct kvm_vcpu *vcpu,
>  				unsigned char *hypercall_addr);
> -	void (*set_irq)(struct kvm_vcpu *vcpu, int vec);
> +	void (*set_irq)(struct kvm_vcpu *vcpu, int vec, bool soft);

Seems all current interrupt info are go with struct kvm_vcpu now, how about 
only take vcpu as parameter? Or using another struct kvm_queued_interrupt 
pointer in addition to vcpu?

>  	void (*set_nmi)(struct kvm_vcpu *vcpu);
>  	void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr,
>  				bool has_error_code, u32 error_code);
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index 14cdfce..d5173a2 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -2284,7 +2284,7 @@ static void svm_queue_irq(struct kvm_vcpu *vcpu,
> unsigned nr) SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
>  }
>
> -static void svm_set_irq(struct kvm_vcpu *vcpu, int irq)
> +static void svm_set_irq(struct kvm_vcpu *vcpu, int irq, bool soft)
>  {
>  	struct vcpu_svm *svm = to_svm(vcpu);
>
> @@ -2392,7 +2392,7 @@ static void svm_complete_interrupts(struct vcpu_svm
> *svm) case SVM_EXITINTINFO_TYPE_EXEPT:
>  		/* In case of software exception do not reinject an exception
>  		   vector, but re-execute and instruction instead */
> -		if (vector == BP_VECTOR || vector == OF_VECTOR)
> +		if (kvm_exception_is_soft(vector))
>  			break;
>  		if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) {
>  			u32 err = svm->vmcb->control.exit_int_info_err;
> @@ -2402,7 +2402,7 @@ static void svm_complete_interrupts(struct vcpu_svm
> *svm) kvm_queue_exception(&svm->vcpu, vector);
>  		break;
>  	case SVM_EXITINTINFO_TYPE_INTR:
> -		kvm_queue_interrupt(&svm->vcpu, vector);
> +		kvm_queue_interrupt(&svm->vcpu, vector, false);
>  		break;
>  	default:
>  		break;
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index a9b30e6..092a3ee 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -779,8 +779,9 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu,
> unsigned nr, return;
>  	}
>
> -	if (nr == BP_VECTOR || nr == OF_VECTOR) {
> -		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
> +	if (kvm_exception_is_soft(nr)) {

I noticed that DB_VECTOR was added as a soft one, but don't see the related 
chapter in the spec?

-- 
regards
Yang, Sheng

> +		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
> +			     vmx->vcpu.arch.event_exit_inst_len);
>  		intr_info |= INTR_TYPE_SOFT_EXCEPTION;
>  	} else
>  		intr_info |= INTR_TYPE_HARD_EXCEPTION;
> @@ -2429,9 +2430,10 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
>  	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
>  }
>
> -static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
> +static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq, bool soft)
>  {
>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
> +	uint32_t intr;
>
>  	KVMTRACE_1D(INJ_VIRQ, vcpu, (u32)irq, handler);
>
> @@ -2446,8 +2448,14 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu,
> int irq) kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1);
>  		return;
>  	}
> -	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
> -			irq | INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK);
> +	intr = irq | INTR_INFO_VALID_MASK;
> +	if (soft) {
> +		intr |= INTR_TYPE_SOFT_INTR;
> +		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
> +			     vmx->vcpu.arch.event_exit_inst_len);
> +	} else
> +		intr |= INTR_TYPE_EXT_INTR;
> +	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr);
>  }
>
>  static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
> @@ -3008,6 +3016,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu,
> struct kvm_run *kvm_run) GUEST_INTR_STATE_NMI);
>  			break;
>  		case INTR_TYPE_EXT_INTR:
> +		case INTR_TYPE_SOFT_INTR:
>  			kvm_clear_interrupt_queue(vcpu);
>  			break;
>  		case INTR_TYPE_HARD_EXCEPTION:
> @@ -3279,16 +3288,22 @@ static void vmx_complete_interrupts(struct vcpu_vmx
> *vmx) vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
>  				GUEST_INTR_STATE_NMI);
>  		break;
> -	case INTR_TYPE_HARD_EXCEPTION:
>  	case INTR_TYPE_SOFT_EXCEPTION:
> +		vmx->vcpu.arch.event_exit_inst_len =
> +			vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
> +	case INTR_TYPE_HARD_EXCEPTION:
>  		if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
>  			u32 err = vmcs_read32(IDT_VECTORING_ERROR_CODE);
>  			kvm_queue_exception_e(&vmx->vcpu, vector, err);
>  		} else
>  			kvm_queue_exception(&vmx->vcpu, vector);
>  		break;
> +	case INTR_TYPE_SOFT_INTR:
> +		vmx->vcpu.arch.event_exit_inst_len =
> +			vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
>  	case INTR_TYPE_EXT_INTR:
> -		kvm_queue_interrupt(&vmx->vcpu, vector);
> +		kvm_queue_interrupt(&vmx->vcpu, vector,
> +			type == INTR_TYPE_SOFT_INTR);
>  		break;
>  	default:
>  		break;
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 4596927..023842b 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -1424,7 +1424,7 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu
> *vcpu, return -ENXIO;
>  	vcpu_load(vcpu);
>
> -	kvm_queue_interrupt(vcpu, irq->irq);
> +	kvm_queue_interrupt(vcpu, irq->irq, false);
>
>  	vcpu_put(vcpu);
>
> @@ -3136,7 +3136,8 @@ static void inject_irq(struct kvm_vcpu *vcpu)
>  	}
>
>  	if (vcpu->arch.interrupt.pending) {
> -		kvm_x86_ops->set_irq(vcpu, vcpu->arch.interrupt.nr);
> +		kvm_x86_ops->set_irq(vcpu, vcpu->arch.interrupt.nr,
> +				     vcpu->arch.interrupt.soft);
>  		return;
>  	}
>
> @@ -3149,8 +3150,10 @@ static void inject_irq(struct kvm_vcpu *vcpu)
>  		}
>  	} else if (kvm_cpu_has_interrupt(vcpu)) {
>  		if (kvm_x86_ops->interrupt_allowed(vcpu)) {
> -			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
> -			kvm_x86_ops->set_irq(vcpu, vcpu->arch.interrupt.nr);
> +			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
> +					    false);
> +			kvm_x86_ops->set_irq(vcpu, vcpu->arch.interrupt.nr,
> +					     false);
>  		}
>  	}
>  }
> @@ -4077,7 +4080,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu
> *vcpu, pending_vec = find_first_bit(
>  		(const unsigned long *)sregs->interrupt_bitmap, max_bits);
>  	if (pending_vec < max_bits) {
> -		kvm_queue_interrupt(vcpu, pending_vec);
> +		kvm_queue_interrupt(vcpu, pending_vec, false);
>  		pr_debug("Set back pending irq %d\n", pending_vec);
>  		if (irqchip_in_kernel(vcpu->kvm))
>  			kvm_pic_clear_isr_ack(vcpu->kvm);
> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index c1f1a8c..2817c86 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -8,9 +8,11 @@ static inline void kvm_clear_exception_queue(struct
> kvm_vcpu *vcpu) vcpu->arch.exception.pending = false;
>  }
>
> -static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector)
> +static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector,
> +	bool soft)
>  {
>  	vcpu->arch.interrupt.pending = true;
> +	vcpu->arch.interrupt.soft = soft;
>  	vcpu->arch.interrupt.nr = vector;
>  }
>
> @@ -24,4 +26,9 @@ static inline bool kvm_event_needs_reinjection(struct
> kvm_vcpu *vcpu) return vcpu->arch.exception.pending ||
> vcpu->arch.interrupt.pending || vcpu->arch.nmi_injected;
>  }
> +
> +static inline bool kvm_exception_is_soft(unsigned int nr)
> +{
> +	return (nr == BP_VECTOR) || (nr == OF_VECTOR) || (nr == DB_VECTOR);
> +}
>  #endif




^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 5/9] [VMX] Do not re-execute INTn instruction.
  2009-05-06  6:57   ` Sheng Yang
@ 2009-05-06  9:27     ` Gleb Natapov
  2009-05-06  9:30       ` Avi Kivity
  0 siblings, 1 reply; 21+ messages in thread
From: Gleb Natapov @ 2009-05-06  9:27 UTC (permalink / raw)
  To: Sheng Yang; +Cc: kvm, avi

On Wed, May 06, 2009 at 02:57:10PM +0800, Sheng Yang wrote:
> On Tuesday 05 May 2009 16:14:31 Gleb Natapov wrote:
> > Re-inject event instead. This is what Intel suggest. Also use correct
> > instruction length when re-injecting soft fault/interrupt.
> 
> Thanks for fixing this!
> 
> > Signed-off-by: Gleb Natapov <gleb@redhat.com>
> > ---
> >  arch/x86/include/asm/kvm_host.h |    5 ++++-
> >  arch/x86/kvm/svm.c              |    6 +++---
> >  arch/x86/kvm/vmx.c              |   29 ++++++++++++++++++++++-------
> >  arch/x86/kvm/x86.c              |   13 ++++++++-----
> >  arch/x86/kvm/x86.h              |    9 ++++++++-
> >  5 files changed, 45 insertions(+), 17 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/kvm_host.h
> > b/arch/x86/include/asm/kvm_host.h index cc892f5..fea0429 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -319,6 +319,8 @@ struct kvm_vcpu_arch {
> >  	struct kvm_pio_request pio;
> >  	void *pio_data;
> >
> > +	u8 event_exit_inst_len;
> > +
> 
> Can we simply read the field when we need, instead of a new field?
> 
Usually relying on vm exit information to be valid before vm entry
is wrong because migration can happen in a meantime. In this particular
case it is not so obvious since we don't want to migrate pending soft
interrupt, but re-execute instruction instead (we could migrate it
theoretically and may be we should, but when migrating from AMD to
Intel we don't have this info anyway). Another case where instruction
length as read from vmx may be outdated at interrupt injection time is
if exception happened during interrupt delivery and exception should be
re-injected first.

> >  	struct kvm_queued_exception {
> >  		bool pending;
> >  		bool has_error_code;
> > @@ -328,6 +330,7 @@ struct kvm_vcpu_arch {
> >
> >  	struct kvm_queued_interrupt {
> >  		bool pending;
> > +		bool soft;
> >  		u8 nr;
> >  	} interrupt;
> >
> > @@ -510,7 +513,7 @@ struct kvm_x86_ops {
> >  	void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
> >  	void (*patch_hypercall)(struct kvm_vcpu *vcpu,
> >  				unsigned char *hypercall_addr);
> > -	void (*set_irq)(struct kvm_vcpu *vcpu, int vec);
> > +	void (*set_irq)(struct kvm_vcpu *vcpu, int vec, bool soft);
> 
> Seems all current interrupt info are go with struct kvm_vcpu now, how about 
> only take vcpu as parameter? Or using another struct kvm_queued_interrupt 
> pointer in addition to vcpu?
> 
Don't have any particular feelings one way or the other. Lets make it
kvm_vcpu only.

> >  	void (*set_nmi)(struct kvm_vcpu *vcpu);
> >  	void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr,
> >  				bool has_error_code, u32 error_code);
> > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> > index 14cdfce..d5173a2 100644
> > --- a/arch/x86/kvm/svm.c
> > +++ b/arch/x86/kvm/svm.c
> > @@ -2284,7 +2284,7 @@ static void svm_queue_irq(struct kvm_vcpu *vcpu,
> > unsigned nr) SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
> >  }
> >
> > -static void svm_set_irq(struct kvm_vcpu *vcpu, int irq)
> > +static void svm_set_irq(struct kvm_vcpu *vcpu, int irq, bool soft)
> >  {
> >  	struct vcpu_svm *svm = to_svm(vcpu);
> >
> > @@ -2392,7 +2392,7 @@ static void svm_complete_interrupts(struct vcpu_svm
> > *svm) case SVM_EXITINTINFO_TYPE_EXEPT:
> >  		/* In case of software exception do not reinject an exception
> >  		   vector, but re-execute and instruction instead */
> > -		if (vector == BP_VECTOR || vector == OF_VECTOR)
> > +		if (kvm_exception_is_soft(vector))
> >  			break;
> >  		if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) {
> >  			u32 err = svm->vmcb->control.exit_int_info_err;
> > @@ -2402,7 +2402,7 @@ static void svm_complete_interrupts(struct vcpu_svm
> > *svm) kvm_queue_exception(&svm->vcpu, vector);
> >  		break;
> >  	case SVM_EXITINTINFO_TYPE_INTR:
> > -		kvm_queue_interrupt(&svm->vcpu, vector);
> > +		kvm_queue_interrupt(&svm->vcpu, vector, false);
> >  		break;
> >  	default:
> >  		break;
> > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> > index a9b30e6..092a3ee 100644
> > --- a/arch/x86/kvm/vmx.c
> > +++ b/arch/x86/kvm/vmx.c
> > @@ -779,8 +779,9 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu,
> > unsigned nr, return;
> >  	}
> >
> > -	if (nr == BP_VECTOR || nr == OF_VECTOR) {
> > -		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
> > +	if (kvm_exception_is_soft(nr)) {
> 
> I noticed that DB_VECTOR was added as a soft one, but don't see the related 
> chapter in the spec?
> 
I was confused by AMD spec that defines int1 instruction (ICEBP), but now
I see that this instruction is not intercepted as #DB exception. Will fix.

--
			Gleb.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 5/9] [VMX] Do not re-execute INTn instruction.
  2009-05-06  9:27     ` Gleb Natapov
@ 2009-05-06  9:30       ` Avi Kivity
  0 siblings, 0 replies; 21+ messages in thread
From: Avi Kivity @ 2009-05-06  9:30 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: Sheng Yang, kvm

Gleb Natapov wrote:
>>> +	u8 event_exit_inst_len;
>>> +
>>>       
>> Can we simply read the field when we need, instead of a new field?
>>
>>     
> Usually relying on vm exit information to be valid before vm entry
> is wrong because migration can happen in a meantime. In this particular
> case it is not so obvious since we don't want to migrate pending soft
> interrupt, but re-execute instruction instead (we could migrate it
> theoretically and may be we should, but when migrating from AMD to
> Intel we don't have this info anyway). Another case where instruction
> length as read from vmx may be outdated at interrupt injection time is
> if exception happened during interrupt delivery and exception should be
> re-injected first.
>   

Note that in some cases we do keep things in vmcs/vmcb fields -- the 
registers, segments, etc.  This is because we have per-vendor accessors 
for them, so we maintain a "virtual data structure" that common code can 
access.

We could do something similar with the interrupt queue - keep part of it 
in the vmcs/vmcb and use accessors to modify it.  But I don't think it's 
worthwhile; for vmx we have to read and write it anyway (since, unlike 
the registers, the exit and entry fields are different) and for svm it's 
in memory anyway so reading and writing it back is very cheap.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 5/9] [VMX] Do not re-execute INTn instruction.
  2009-05-05  8:14 ` [PATCH 5/9] [VMX] Do not re-execute INTn instruction Gleb Natapov
  2009-05-06  6:57   ` Sheng Yang
@ 2009-05-06 10:59   ` Gregory Haskins
  2009-05-06 11:46   ` Gleb Natapov
  2 siblings, 0 replies; 21+ messages in thread
From: Gregory Haskins @ 2009-05-06 10:59 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: avi, kvm

[-- Attachment #1: Type: text/plain, Size: 8541 bytes --]

Hi Gleb,

Gleb Natapov wrote:
> Re-inject event instead. This is what Intel suggest. Also use correct
> instruction length when re-injecting soft fault/interrupt.
>
> Signed-off-by: Gleb Natapov <gleb@redhat.com>
> ---
>  arch/x86/include/asm/kvm_host.h |    5 ++++-
>  arch/x86/kvm/svm.c              |    6 +++---
>  arch/x86/kvm/vmx.c              |   29 ++++++++++++++++++++++-------
>  arch/x86/kvm/x86.c              |   13 ++++++++-----
>  arch/x86/kvm/x86.h              |    9 ++++++++-
>  5 files changed, 45 insertions(+), 17 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index cc892f5..fea0429 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -319,6 +319,8 @@ struct kvm_vcpu_arch {
>  	struct kvm_pio_request pio;
>  	void *pio_data;
>  
> +	u8 event_exit_inst_len;
> +
>  	struct kvm_queued_exception {
>  		bool pending;
>  		bool has_error_code;
> @@ -328,6 +330,7 @@ struct kvm_vcpu_arch {
>  
>  	struct kvm_queued_interrupt {
>  		bool pending;
> +		bool soft;
>  		u8 nr;
>  	} interrupt;
>  
> @@ -510,7 +513,7 @@ struct kvm_x86_ops {
>  	void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
>  	void (*patch_hypercall)(struct kvm_vcpu *vcpu,
>  				unsigned char *hypercall_addr);
> -	void (*set_irq)(struct kvm_vcpu *vcpu, int vec);
> +	void (*set_irq)(struct kvm_vcpu *vcpu, int vec, bool soft);
>  	void (*set_nmi)(struct kvm_vcpu *vcpu);
>  	void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr,
>  				bool has_error_code, u32 error_code);
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index 14cdfce..d5173a2 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -2284,7 +2284,7 @@ static void svm_queue_irq(struct kvm_vcpu *vcpu, unsigned nr)
>  		SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
>  }
>  
> -static void svm_set_irq(struct kvm_vcpu *vcpu, int irq)
> +static void svm_set_irq(struct kvm_vcpu *vcpu, int irq, bool soft)
>  {
>  	struct vcpu_svm *svm = to_svm(vcpu);
>  
> @@ -2392,7 +2392,7 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
>  	case SVM_EXITINTINFO_TYPE_EXEPT:
>  		/* In case of software exception do not reinject an exception
>  		   vector, but re-execute and instruction instead */
> -		if (vector == BP_VECTOR || vector == OF_VECTOR)
> +		if (kvm_exception_is_soft(vector))
>  			break;
>  		if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) {
>  			u32 err = svm->vmcb->control.exit_int_info_err;
> @@ -2402,7 +2402,7 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
>  			kvm_queue_exception(&svm->vcpu, vector);
>  		break;
>  	case SVM_EXITINTINFO_TYPE_INTR:
> -		kvm_queue_interrupt(&svm->vcpu, vector);
> +		kvm_queue_interrupt(&svm->vcpu, vector, false);
>  		break;
>  	default:
>  		break;
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index a9b30e6..092a3ee 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -779,8 +779,9 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
>  		return;
>  	}
>  
> -	if (nr == BP_VECTOR || nr == OF_VECTOR) {
> -		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
> +	if (kvm_exception_is_soft(nr)) {
> +		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
> +			     vmx->vcpu.arch.event_exit_inst_len);
>  		intr_info |= INTR_TYPE_SOFT_EXCEPTION;
>  	} else
>  		intr_info |= INTR_TYPE_HARD_EXCEPTION;
> @@ -2429,9 +2430,10 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
>  	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
>  }
>  
> -static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
> +static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq, bool soft)
>  {
>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
> +	uint32_t intr;
>  
>  	KVMTRACE_1D(INJ_VIRQ, vcpu, (u32)irq, handler);
>  
> @@ -2446,8 +2448,14 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
>  		kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1);
>  		return;
>  	}
> -	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
> -			irq | INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK);
> +	intr = irq | INTR_INFO_VALID_MASK;
> +	if (soft) {
> +		intr |= INTR_TYPE_SOFT_INTR;
> +		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
> +			     vmx->vcpu.arch.event_exit_inst_len);
> +	} else
> +		intr |= INTR_TYPE_EXT_INTR;
> +	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr);
>  }
>  
>  static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
> @@ -3008,6 +3016,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
>  					      GUEST_INTR_STATE_NMI);
>  			break;
>  		case INTR_TYPE_EXT_INTR:
> +		case INTR_TYPE_SOFT_INTR:
>  			kvm_clear_interrupt_queue(vcpu);
>  			break;
>  		case INTR_TYPE_HARD_EXCEPTION:
> @@ -3279,16 +3288,22 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
>  		vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
>  				GUEST_INTR_STATE_NMI);
>  		break;
> -	case INTR_TYPE_HARD_EXCEPTION:
>  	case INTR_TYPE_SOFT_EXCEPTION:
> +		vmx->vcpu.arch.event_exit_inst_len =
> +			vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
>   

Minor nit: Would suggest a "/* fall through */" comment here to denote
the break was intentionally omitted (assuming it was, but I think so IIUC).

> +	case INTR_TYPE_HARD_EXCEPTION:
>  		if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
>  			u32 err = vmcs_read32(IDT_VECTORING_ERROR_CODE);
>  			kvm_queue_exception_e(&vmx->vcpu, vector, err);
>  		} else
>  			kvm_queue_exception(&vmx->vcpu, vector);
>  		break;
> +	case INTR_TYPE_SOFT_INTR:
> +		vmx->vcpu.arch.event_exit_inst_len =
> +			vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
>  	case INTR_TYPE_EXT_INTR:
> -		kvm_queue_interrupt(&vmx->vcpu, vector);
> +		kvm_queue_interrupt(&vmx->vcpu, vector,
> +			type == INTR_TYPE_SOFT_INTR);
>  		break;
>  	default:
>  		break;
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 4596927..023842b 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -1424,7 +1424,7 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
>  		return -ENXIO;
>  	vcpu_load(vcpu);
>  
> -	kvm_queue_interrupt(vcpu, irq->irq);
> +	kvm_queue_interrupt(vcpu, irq->irq, false);
>  
>  	vcpu_put(vcpu);
>  
> @@ -3136,7 +3136,8 @@ static void inject_irq(struct kvm_vcpu *vcpu)
>  	}
>  
>  	if (vcpu->arch.interrupt.pending) {
> -		kvm_x86_ops->set_irq(vcpu, vcpu->arch.interrupt.nr);
> +		kvm_x86_ops->set_irq(vcpu, vcpu->arch.interrupt.nr,
> +				     vcpu->arch.interrupt.soft);
>  		return;
>  	}
>  
> @@ -3149,8 +3150,10 @@ static void inject_irq(struct kvm_vcpu *vcpu)
>  		}
>  	} else if (kvm_cpu_has_interrupt(vcpu)) {
>  		if (kvm_x86_ops->interrupt_allowed(vcpu)) {
> -			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
> -			kvm_x86_ops->set_irq(vcpu, vcpu->arch.interrupt.nr);
> +			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
> +					    false);
> +			kvm_x86_ops->set_irq(vcpu, vcpu->arch.interrupt.nr,
> +					     false);
>  		}
>  	}
>  }
> @@ -4077,7 +4080,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
>  	pending_vec = find_first_bit(
>  		(const unsigned long *)sregs->interrupt_bitmap, max_bits);
>  	if (pending_vec < max_bits) {
> -		kvm_queue_interrupt(vcpu, pending_vec);
> +		kvm_queue_interrupt(vcpu, pending_vec, false);
>  		pr_debug("Set back pending irq %d\n", pending_vec);
>  		if (irqchip_in_kernel(vcpu->kvm))
>  			kvm_pic_clear_isr_ack(vcpu->kvm);
> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index c1f1a8c..2817c86 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -8,9 +8,11 @@ static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu)
>  	vcpu->arch.exception.pending = false;
>  }
>  
> -static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector)
> +static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector,
> +	bool soft)
>  {
>  	vcpu->arch.interrupt.pending = true;
> +	vcpu->arch.interrupt.soft = soft;
>  	vcpu->arch.interrupt.nr = vector;
>  }
>  
> @@ -24,4 +26,9 @@ static inline bool kvm_event_needs_reinjection(struct kvm_vcpu *vcpu)
>  	return vcpu->arch.exception.pending || vcpu->arch.interrupt.pending ||
>  		vcpu->arch.nmi_injected;
>  }
> +
> +static inline bool kvm_exception_is_soft(unsigned int nr)
> +{
> +	return (nr == BP_VECTOR) || (nr == OF_VECTOR) || (nr == DB_VECTOR);
> +}
>  #endif
>   



[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 266 bytes --]

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 5/9] [VMX] Do not re-execute INTn instruction.
  2009-05-05  8:14 ` [PATCH 5/9] [VMX] Do not re-execute INTn instruction Gleb Natapov
  2009-05-06  6:57   ` Sheng Yang
  2009-05-06 10:59   ` Gregory Haskins
@ 2009-05-06 11:46   ` Gleb Natapov
  2 siblings, 0 replies; 21+ messages in thread
From: Gleb Natapov @ 2009-05-06 11:46 UTC (permalink / raw)
  To: avi; +Cc: kvm

Here is updated patch with hopefully all comments addressed.

Re-inject event instead. This is what Intel suggest. Also use correct
instruction length when re-injecting soft fault/interrupt.
 
Signed-off-by: Gleb Natapov <gleb@redhat.com>
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index cc892f5..c5fac32 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -319,6 +319,8 @@ struct kvm_vcpu_arch {
 	struct kvm_pio_request pio;
 	void *pio_data;
 
+	u8 event_exit_inst_len;
+
 	struct kvm_queued_exception {
 		bool pending;
 		bool has_error_code;
@@ -328,6 +330,7 @@ struct kvm_vcpu_arch {
 
 	struct kvm_queued_interrupt {
 		bool pending;
+		bool soft;
 		u8 nr;
 	} interrupt;
 
@@ -510,7 +513,7 @@ struct kvm_x86_ops {
 	void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
 	void (*patch_hypercall)(struct kvm_vcpu *vcpu,
 				unsigned char *hypercall_addr);
-	void (*set_irq)(struct kvm_vcpu *vcpu, int vec);
+	void (*set_irq)(struct kvm_vcpu *vcpu);
 	void (*set_nmi)(struct kvm_vcpu *vcpu);
 	void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr,
 				bool has_error_code, u32 error_code);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 14cdfce..3cc843e 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2284,13 +2284,13 @@ static void svm_queue_irq(struct kvm_vcpu *vcpu, unsigned nr)
 		SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
 }
 
-static void svm_set_irq(struct kvm_vcpu *vcpu, int irq)
+static void svm_set_irq(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
 	nested_svm_intr(svm);
 
-	svm_queue_irq(vcpu, irq);
+	svm_queue_irq(vcpu, vcpu->arch.interrupt.nr);
 }
 
 static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
@@ -2392,7 +2392,7 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
 	case SVM_EXITINTINFO_TYPE_EXEPT:
 		/* In case of software exception do not reinject an exception
 		   vector, but re-execute and instruction instead */
-		if (vector == BP_VECTOR || vector == OF_VECTOR)
+		if (kvm_exception_is_soft(vector))
 			break;
 		if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) {
 			u32 err = svm->vmcb->control.exit_int_info_err;
@@ -2402,7 +2402,7 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
 			kvm_queue_exception(&svm->vcpu, vector);
 		break;
 	case SVM_EXITINTINFO_TYPE_INTR:
-		kvm_queue_interrupt(&svm->vcpu, vector);
+		kvm_queue_interrupt(&svm->vcpu, vector, false);
 		break;
 	default:
 		break;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index a9b30e6..211a787 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -779,8 +779,9 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
 		return;
 	}
 
-	if (nr == BP_VECTOR || nr == OF_VECTOR) {
-		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
+	if (kvm_exception_is_soft(nr)) {
+		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
+			     vmx->vcpu.arch.event_exit_inst_len);
 		intr_info |= INTR_TYPE_SOFT_EXCEPTION;
 	} else
 		intr_info |= INTR_TYPE_HARD_EXCEPTION;
@@ -2429,9 +2430,11 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
 	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
 }
 
-static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
+static void vmx_inject_irq(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	uint32_t intr;
+	int irq = vcpu->arch.interrupt.nr;
 
 	KVMTRACE_1D(INJ_VIRQ, vcpu, (u32)irq, handler);
 
@@ -2446,8 +2449,14 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
 		kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1);
 		return;
 	}
-	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
-			irq | INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK);
+	intr = irq | INTR_INFO_VALID_MASK;
+	if (vcpu->arch.interrupt.soft) {
+		intr |= INTR_TYPE_SOFT_INTR;
+		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
+			     vmx->vcpu.arch.event_exit_inst_len);
+	} else
+		intr |= INTR_TYPE_EXT_INTR;
+	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr);
 }
 
 static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
@@ -3008,6 +3017,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 					      GUEST_INTR_STATE_NMI);
 			break;
 		case INTR_TYPE_EXT_INTR:
+		case INTR_TYPE_SOFT_INTR:
 			kvm_clear_interrupt_queue(vcpu);
 			break;
 		case INTR_TYPE_HARD_EXCEPTION:
@@ -3279,16 +3289,24 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
 		vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
 				GUEST_INTR_STATE_NMI);
 		break;
-	case INTR_TYPE_HARD_EXCEPTION:
 	case INTR_TYPE_SOFT_EXCEPTION:
+		vmx->vcpu.arch.event_exit_inst_len =
+			vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
+		/* fall through */
+	case INTR_TYPE_HARD_EXCEPTION:
 		if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
 			u32 err = vmcs_read32(IDT_VECTORING_ERROR_CODE);
 			kvm_queue_exception_e(&vmx->vcpu, vector, err);
 		} else
 			kvm_queue_exception(&vmx->vcpu, vector);
 		break;
+	case INTR_TYPE_SOFT_INTR:
+		vmx->vcpu.arch.event_exit_inst_len =
+			vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
+		/* fall through */
 	case INTR_TYPE_EXT_INTR:
-		kvm_queue_interrupt(&vmx->vcpu, vector);
+		kvm_queue_interrupt(&vmx->vcpu, vector,
+			type == INTR_TYPE_SOFT_INTR);
 		break;
 	default:
 		break;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4596927..73d375a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1424,7 +1424,7 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
 		return -ENXIO;
 	vcpu_load(vcpu);
 
-	kvm_queue_interrupt(vcpu, irq->irq);
+	kvm_queue_interrupt(vcpu, irq->irq, false);
 
 	vcpu_put(vcpu);
 
@@ -3136,7 +3136,7 @@ static void inject_irq(struct kvm_vcpu *vcpu)
 	}
 
 	if (vcpu->arch.interrupt.pending) {
-		kvm_x86_ops->set_irq(vcpu, vcpu->arch.interrupt.nr);
+		kvm_x86_ops->set_irq(vcpu);
 		return;
 	}
 
@@ -3149,8 +3149,9 @@ static void inject_irq(struct kvm_vcpu *vcpu)
 		}
 	} else if (kvm_cpu_has_interrupt(vcpu)) {
 		if (kvm_x86_ops->interrupt_allowed(vcpu)) {
-			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
-			kvm_x86_ops->set_irq(vcpu, vcpu->arch.interrupt.nr);
+			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
+					    false);
+			kvm_x86_ops->set_irq(vcpu);
 		}
 	}
 }
@@ -4077,7 +4078,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 	pending_vec = find_first_bit(
 		(const unsigned long *)sregs->interrupt_bitmap, max_bits);
 	if (pending_vec < max_bits) {
-		kvm_queue_interrupt(vcpu, pending_vec);
+		kvm_queue_interrupt(vcpu, pending_vec, false);
 		pr_debug("Set back pending irq %d\n", pending_vec);
 		if (irqchip_in_kernel(vcpu->kvm))
 			kvm_pic_clear_isr_ack(vcpu->kvm);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index c1f1a8c..4c8e10a 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -8,9 +8,11 @@ static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu)
 	vcpu->arch.exception.pending = false;
 }
 
-static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector)
+static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector,
+	bool soft)
 {
 	vcpu->arch.interrupt.pending = true;
+	vcpu->arch.interrupt.soft = soft;
 	vcpu->arch.interrupt.nr = vector;
 }
 
@@ -24,4 +26,9 @@ static inline bool kvm_event_needs_reinjection(struct kvm_vcpu *vcpu)
 	return vcpu->arch.exception.pending || vcpu->arch.interrupt.pending ||
 		vcpu->arch.nmi_injected;
 }
+
+static inline bool kvm_exception_is_soft(unsigned int nr)
+{
+	return (nr == BP_VECTOR) || (nr == OF_VECTOR);
+}
 #endif
--
			Gleb.

^ permalink raw reply related	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2009-05-06 11:47 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-05-05  8:14 [PATCH 1/9] Unprotect a page if #PF happens during NMI injection Gleb Natapov
2009-05-05  8:14 ` [PATCH 2/9] Do not allow interrupt injection from userspace if there is a pending event Gleb Natapov
2009-05-05  8:14 ` [PATCH 3/9] Remove irq_pending bitmap Gleb Natapov
2009-05-06  5:55   ` Sheng Yang
2009-05-06  6:50     ` Sheng Yang
2009-05-05  8:14 ` [PATCH 4/9] [SVM] skip_emulated_instruction() decode an instruction if size is not known Gleb Natapov
2009-05-05  8:14 ` [PATCH 5/9] [VMX] Do not re-execute INTn instruction Gleb Natapov
2009-05-06  6:57   ` Sheng Yang
2009-05-06  9:27     ` Gleb Natapov
2009-05-06  9:30       ` Avi Kivity
2009-05-06 10:59   ` Gregory Haskins
2009-05-06 11:46   ` Gleb Natapov
2009-05-05  8:14 ` [PATCH 6/9] IRQ/NMI window should always be requested Gleb Natapov
2009-05-05  8:14 ` [PATCH 7/9] Drop interrupt shadow when single stepping should be done only on VMX Gleb Natapov
2009-05-05  8:14 ` [PATCH 8/9] Replace pending exception by PF if it happens serially Gleb Natapov
2009-05-05  8:14 ` [PATCH 9/9] [SVM] inject NMI after IRET from a previous NMI, not before Gleb Natapov
2009-05-05  8:45   ` Jan Kiszka
2009-05-05  9:03     ` Gleb Natapov
2009-05-05  9:25       ` Jan Kiszka
2009-05-05  9:32         ` Gleb Natapov
2009-05-05  9:47   ` Gleb Natapov

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.