All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCHv2 0/5] improve speed of "rep ins" emulation
@ 2012-06-12 12:01 Gleb Natapov
  2012-06-12 12:01 ` [PATCHv2 1/5] Provide userspace IO exit completion callback Gleb Natapov
                   ` (4 more replies)
  0 siblings, 5 replies; 25+ messages in thread
From: Gleb Natapov @ 2012-06-12 12:01 UTC (permalink / raw)
  To: kvm; +Cc: avi, mtosatti

With this patches loading 100M initrd takes ~10s instead of ~40s without.

Changelog:
 v1->v2
   - add segment check and mask rcx/rdi correctly drying increment.

Gleb Natapov (5):
  Provide userspace IO exit completion callback.
  KVM: emulator: make x86 emulation modes enum instead of defines
  KVM: emulator: move some address manipulation function out of
    emulator code.
  KVM: emulator: move linearize() out of emulator code.
  KVM: Provide fast path for "rep ins" emulation if possible.

 arch/x86/include/asm/kvm_emulate.h |   36 +++--
 arch/x86/include/asm/kvm_host.h    |   32 ++++
 arch/x86/kvm/emulate.c             |  103 ++-----------
 arch/x86/kvm/svm.c                 |   20 ++-
 arch/x86/kvm/vmx.c                 |   25 ++-
 arch/x86/kvm/x86.c                 |  317 ++++++++++++++++++++++++++++++------
 6 files changed, 364 insertions(+), 169 deletions(-)

-- 
1.7.7.3


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCHv2 1/5] Provide userspace IO exit completion callback.
  2012-06-12 12:01 [PATCHv2 0/5] improve speed of "rep ins" emulation Gleb Natapov
@ 2012-06-12 12:01 ` Gleb Natapov
  2012-06-29  0:51   ` Marcelo Tosatti
  2012-06-12 12:01 ` [PATCHv2 2/5] KVM: emulator: make x86 emulation modes enum instead of defines Gleb Natapov
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 25+ messages in thread
From: Gleb Natapov @ 2012-06-12 12:01 UTC (permalink / raw)
  To: kvm; +Cc: avi, mtosatti

Current code assumes that IO exit was due to instruction emulation
and handles execution back to emulator directly. This patch adds new
userspace IO exit completion callback that can be set by any other code
that caused IO exit to userspace.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |    1 +
 arch/x86/kvm/x86.c              |   92 +++++++++++++++++++++++----------------
 2 files changed, 56 insertions(+), 37 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index db7c1f2..1a1bba6 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -406,6 +406,7 @@ struct kvm_vcpu_arch {
 	struct x86_emulate_ctxt emulate_ctxt;
 	bool emulate_regs_need_sync_to_vcpu;
 	bool emulate_regs_need_sync_from_vcpu;
+	int (*complete_userspace_io)(struct kvm_vcpu *vcpu);
 
 	gpa_t time;
 	struct pvclock_vcpu_time_info hv_clock;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a01a424..6fa0e21 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4547,6 +4547,9 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
 	return true;
 }
 
+static int complete_emulated_mmio(struct kvm_vcpu *vcpu);
+static int complete_emulated_pio(struct kvm_vcpu *vcpu);
+
 int x86_emulate_instruction(struct kvm_vcpu *vcpu,
 			    unsigned long cr2,
 			    int emulation_type,
@@ -4617,13 +4620,16 @@ restart:
 	} else if (vcpu->arch.pio.count) {
 		if (!vcpu->arch.pio.in)
 			vcpu->arch.pio.count = 0;
-		else
+		else {
 			writeback = false;
+			vcpu->arch.complete_userspace_io = complete_emulated_pio;
+		}
 		r = EMULATE_DO_MMIO;
 	} else if (vcpu->mmio_needed) {
 		if (!vcpu->mmio_is_write)
 			writeback = false;
 		r = EMULATE_DO_MMIO;
+		vcpu->arch.complete_userspace_io = complete_emulated_mmio;
 	} else if (r == EMULATION_RESTART)
 		goto restart;
 	else
@@ -5474,6 +5480,24 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
 	return r;
 }
 
+static inline int complete_emulated_io(struct kvm_vcpu *vcpu)
+{
+	int r;
+	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+	r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
+	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+	if (r != EMULATE_DONE)
+		return 0;
+	return 1;
+}
+
+static int complete_emulated_pio(struct kvm_vcpu *vcpu)
+{
+	BUG_ON(!vcpu->arch.pio.count);
+
+	return complete_emulated_io(vcpu);
+}
+
 /*
  * Implements the following, as a state machine:
  *
@@ -5490,47 +5514,37 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
  *      copy data
  *      exit
  */
-static int complete_mmio(struct kvm_vcpu *vcpu)
+static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
 {
 	struct kvm_run *run = vcpu->run;
 	struct kvm_mmio_fragment *frag;
-	int r;
 
-	if (!(vcpu->arch.pio.count || vcpu->mmio_needed))
-		return 1;
+	BUG_ON(!vcpu->mmio_needed);
 
-	if (vcpu->mmio_needed) {
-		/* Complete previous fragment */
-		frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment++];
-		if (!vcpu->mmio_is_write)
-			memcpy(frag->data, run->mmio.data, frag->len);
-		if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) {
-			vcpu->mmio_needed = 0;
-			if (vcpu->mmio_is_write)
-				return 1;
-			vcpu->mmio_read_completed = 1;
-			goto done;
-		}
-		/* Initiate next fragment */
-		++frag;
-		run->exit_reason = KVM_EXIT_MMIO;
-		run->mmio.phys_addr = frag->gpa;
+	/* Complete previous fragment */
+	frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment++];
+	if (!vcpu->mmio_is_write)
+		memcpy(frag->data, run->mmio.data, frag->len);
+	if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) {
+		vcpu->mmio_needed = 0;
 		if (vcpu->mmio_is_write)
-			memcpy(run->mmio.data, frag->data, frag->len);
-		run->mmio.len = frag->len;
-		run->mmio.is_write = vcpu->mmio_is_write;
-		return 0;
-
-	}
-done:
-	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
-	r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
-	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
-	if (r != EMULATE_DONE)
-		return 0;
-	return 1;
+			return 1;
+		vcpu->mmio_read_completed = 1;
+		return complete_emulated_io(vcpu);
+	}
+	/* Initiate next fragment */
+	++frag;
+	run->exit_reason = KVM_EXIT_MMIO;
+	run->mmio.phys_addr = frag->gpa;
+	if (vcpu->mmio_is_write)
+		memcpy(run->mmio.data, frag->data, frag->len);
+	run->mmio.len = frag->len;
+	run->mmio.is_write = vcpu->mmio_is_write;
+	vcpu->arch.complete_userspace_io = complete_emulated_mmio;
+	return 0;
 }
 
+
 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	int r;
@@ -5557,9 +5571,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		}
 	}
 
-	r = complete_mmio(vcpu);
-	if (r <= 0)
-		goto out;
+	if (unlikely(vcpu->arch.complete_userspace_io)) {
+		int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io;
+		vcpu->arch.complete_userspace_io = NULL;
+		r = cui(vcpu);
+		if (r <= 0)
+			goto out;
+	}
 
 	r = __vcpu_run(vcpu);
 
-- 
1.7.7.3


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCHv2 2/5] KVM: emulator: make x86 emulation modes enum instead of defines
  2012-06-12 12:01 [PATCHv2 0/5] improve speed of "rep ins" emulation Gleb Natapov
  2012-06-12 12:01 ` [PATCHv2 1/5] Provide userspace IO exit completion callback Gleb Natapov
@ 2012-06-12 12:01 ` Gleb Natapov
  2012-06-12 12:01 ` [PATCHv2 3/5] KVM: emulator: move some address manipulation function out of emulator code Gleb Natapov
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 25+ messages in thread
From: Gleb Natapov @ 2012-06-12 12:01 UTC (permalink / raw)
  To: kvm; +Cc: avi, mtosatti


Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/include/asm/kvm_emulate.h |   22 ++++++++++------------
 arch/x86/kvm/emulate.c             |    4 +++-
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 1ac46c22..7c276ca 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -236,6 +236,15 @@ struct read_cache {
 	unsigned long end;
 };
 
+/* Execution mode, passed to the emulator. */
+enum x86emul_mode {
+	X86EMUL_MODE_REAL,	/* Real mode.             */
+	X86EMUL_MODE_VM86,	/* Virtual 8086 mode.     */
+	X86EMUL_MODE_PROT16,	/* 16-bit protected mode. */
+	X86EMUL_MODE_PROT32,	/* 32-bit protected mode. */
+	X86EMUL_MODE_PROT64,	/* 64-bit (long) mode.    */
+};
+
 struct x86_emulate_ctxt {
 	struct x86_emulate_ops *ops;
 
@@ -243,7 +252,7 @@ struct x86_emulate_ctxt {
 	unsigned long eflags;
 	unsigned long eip; /* eip before instruction emulation */
 	/* Emulated execution mode, represented by an X86EMUL_MODE value. */
-	int mode;
+	enum x86emul_mode mode;
 
 	/* interruptibility state, as a result of execution of STI or MOV SS */
 	int interruptibility;
@@ -293,17 +302,6 @@ struct x86_emulate_ctxt {
 #define REPE_PREFIX	0xf3
 #define REPNE_PREFIX	0xf2
 
-/* Execution mode, passed to the emulator. */
-#define X86EMUL_MODE_REAL     0	/* Real mode.             */
-#define X86EMUL_MODE_VM86     1	/* Virtual 8086 mode.     */
-#define X86EMUL_MODE_PROT16   2	/* 16-bit protected mode. */
-#define X86EMUL_MODE_PROT32   4	/* 32-bit protected mode. */
-#define X86EMUL_MODE_PROT64   8	/* 64-bit (long) mode.    */
-
-/* any protected mode   */
-#define X86EMUL_MODE_PROT     (X86EMUL_MODE_PROT16|X86EMUL_MODE_PROT32| \
-			       X86EMUL_MODE_PROT64)
-
 /* CPUID vendors */
 #define X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx 0x68747541
 #define X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx 0x444d4163
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index f95d242..79899df 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2141,6 +2141,8 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt)
 		if (msr_data == 0x0)
 			return emulate_gp(ctxt, 0);
 		break;
+	default:
+		break;
 	}
 
 	ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
@@ -4179,7 +4181,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
 	}
 
 	/* Instruction can only be executed in protected mode */
-	if ((ctxt->d & Prot) && !(ctxt->mode & X86EMUL_MODE_PROT)) {
+	if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) {
 		rc = emulate_ud(ctxt);
 		goto done;
 	}
-- 
1.7.7.3


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCHv2 3/5] KVM: emulator: move some address manipulation function out of emulator code.
  2012-06-12 12:01 [PATCHv2 0/5] improve speed of "rep ins" emulation Gleb Natapov
  2012-06-12 12:01 ` [PATCHv2 1/5] Provide userspace IO exit completion callback Gleb Natapov
  2012-06-12 12:01 ` [PATCHv2 2/5] KVM: emulator: make x86 emulation modes enum instead of defines Gleb Natapov
@ 2012-06-12 12:01 ` Gleb Natapov
  2012-06-12 12:01 ` [PATCHv2 4/5] KVM: emulator: move linearize() " Gleb Natapov
  2012-06-12 12:01 ` [PATCHv2 5/5] KVM: Provide fast path for "rep ins" emulation if possible Gleb Natapov
  4 siblings, 0 replies; 25+ messages in thread
From: Gleb Natapov @ 2012-06-12 12:01 UTC (permalink / raw)
  To: kvm; +Cc: avi, mtosatti

The functions will be used outside of the emulator.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |   25 +++++++++++++++++++++++++
 arch/x86/kvm/emulate.c          |   15 ++-------------
 2 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 1a1bba6..0f52aae 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -968,4 +968,29 @@ int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data);
 void kvm_handle_pmu_event(struct kvm_vcpu *vcpu);
 void kvm_deliver_pmi(struct kvm_vcpu *vcpu);
 
+static inline unsigned long kvm_ad_mask(u8 ad_bytes)
+{
+	return (1UL << (ad_bytes << 3)) - 1;
+}
+
+/* Access/update address held in a register, based on addressing mode. */
+static inline unsigned long
+kvm_address_mask(u8 ad_bytes, unsigned long reg)
+{
+	if (ad_bytes == sizeof(unsigned long))
+		return reg;
+	else
+		return reg & kvm_ad_mask(ad_bytes);
+}
+
+static inline void
+kvm_register_address_increment(u8 ad_bytes, unsigned long *reg, int inc)
+{
+	if (ad_bytes == sizeof(unsigned long))
+		*reg += inc;
+	else
+		*reg = (*reg & ~kvm_ad_mask(ad_bytes)) |
+			((*reg + inc) & kvm_ad_mask(ad_bytes));
+}
+
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 79899df..e317588 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -433,19 +433,11 @@ static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt,
 	return ctxt->ops->intercept(ctxt, &info, stage);
 }
 
-static inline unsigned long ad_mask(struct x86_emulate_ctxt *ctxt)
-{
-	return (1UL << (ctxt->ad_bytes << 3)) - 1;
-}
-
 /* Access/update address held in a register, based on addressing mode. */
 static inline unsigned long
 address_mask(struct x86_emulate_ctxt *ctxt, unsigned long reg)
 {
-	if (ctxt->ad_bytes == sizeof(unsigned long))
-		return reg;
-	else
-		return reg & ad_mask(ctxt);
+	return kvm_address_mask(ctxt->ad_bytes, reg);
 }
 
 static inline unsigned long
@@ -457,10 +449,7 @@ register_address(struct x86_emulate_ctxt *ctxt, unsigned long reg)
 static inline void
 register_address_increment(struct x86_emulate_ctxt *ctxt, unsigned long *reg, int inc)
 {
-	if (ctxt->ad_bytes == sizeof(unsigned long))
-		*reg += inc;
-	else
-		*reg = (*reg & ~ad_mask(ctxt)) | ((*reg + inc) & ad_mask(ctxt));
+	return kvm_register_address_increment(ctxt->ad_bytes, reg, inc);
 }
 
 static inline void jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
-- 
1.7.7.3


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCHv2 4/5] KVM: emulator: move linearize() out of emulator code.
  2012-06-12 12:01 [PATCHv2 0/5] improve speed of "rep ins" emulation Gleb Natapov
                   ` (2 preceding siblings ...)
  2012-06-12 12:01 ` [PATCHv2 3/5] KVM: emulator: move some address manipulation function out of emulator code Gleb Natapov
@ 2012-06-12 12:01 ` Gleb Natapov
  2012-06-24 13:12   ` Avi Kivity
  2012-06-12 12:01 ` [PATCHv2 5/5] KVM: Provide fast path for "rep ins" emulation if possible Gleb Natapov
  4 siblings, 1 reply; 25+ messages in thread
From: Gleb Natapov @ 2012-06-12 12:01 UTC (permalink / raw)
  To: kvm; +Cc: avi, mtosatti

The function will be used outside of the emulator.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/include/asm/kvm_emulate.h |   14 ++++--
 arch/x86/kvm/emulate.c             |   84 ++------------------------------
 arch/x86/kvm/x86.c                 |   92 ++++++++++++++++++++++++++++++++++--
 3 files changed, 103 insertions(+), 87 deletions(-)

diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 7c276ca..bdf8a84 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -43,6 +43,11 @@ struct x86_instruction_info {
 	u64 next_rip;           /* rip following the instruction        */
 };
 
+struct segmented_address {
+	ulong ea;
+	unsigned seg;
+};
+
 /*
  * x86_emulate_ops:
  *
@@ -194,6 +199,10 @@ struct x86_emulate_ops {
 
 	bool (*get_cpuid)(struct x86_emulate_ctxt *ctxt,
 			 u32 *eax, u32 *ebx, u32 *ecx, u32 *edx);
+
+	int (*linearize)(struct x86_emulate_ctxt *ctxt,
+			struct segmented_address addr, unsigned size,
+			bool write, bool fetch, ulong *linear);
 };
 
 typedef u32 __attribute__((vector_size(16))) sse128_t;
@@ -208,10 +217,7 @@ struct operand {
 	};
 	union {
 		unsigned long *reg;
-		struct segmented_address {
-			ulong ea;
-			unsigned seg;
-		} mem;
+		struct segmented_address mem;
 		unsigned xmm;
 		unsigned mm;
 	} addr;
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index e317588..24b1c70 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -470,14 +470,6 @@ static void set_seg_override(struct x86_emulate_ctxt *ctxt, int seg)
 	ctxt->seg_override = seg;
 }
 
-static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
-{
-	if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS)
-		return 0;
-
-	return ctxt->ops->get_cached_segment_base(ctxt, seg);
-}
-
 static unsigned seg_override(struct x86_emulate_ctxt *ctxt)
 {
 	if (!ctxt->has_seg_override)
@@ -505,11 +497,6 @@ static int emulate_gp(struct x86_emulate_ctxt *ctxt, int err)
 	return emulate_exception(ctxt, GP_VECTOR, err, true);
 }
 
-static int emulate_ss(struct x86_emulate_ctxt *ctxt, int err)
-{
-	return emulate_exception(ctxt, SS_VECTOR, err, true);
-}
-
 static int emulate_ud(struct x86_emulate_ctxt *ctxt)
 {
 	return emulate_exception(ctxt, UD_VECTOR, 0, false);
@@ -578,74 +565,15 @@ static int __linearize(struct x86_emulate_ctxt *ctxt,
 		     unsigned size, bool write, bool fetch,
 		     ulong *linear)
 {
-	struct desc_struct desc;
-	bool usable;
-	ulong la;
-	u32 lim;
-	u16 sel;
-	unsigned cpl, rpl;
+	int err = ctxt->ops->linearize(ctxt, addr, size, write, fetch, linear);
 
-	la = seg_base(ctxt, addr.seg) + addr.ea;
-	switch (ctxt->mode) {
-	case X86EMUL_MODE_REAL:
-		break;
-	case X86EMUL_MODE_PROT64:
-		if (((signed long)la << 16) >> 16 != la)
-			return emulate_gp(ctxt, 0);
-		break;
-	default:
-		usable = ctxt->ops->get_segment(ctxt, &sel, &desc, NULL,
-						addr.seg);
-		if (!usable)
-			goto bad;
-		/* code segment or read-only data segment */
-		if (((desc.type & 8) || !(desc.type & 2)) && write)
-			goto bad;
-		/* unreadable code segment */
-		if (!fetch && (desc.type & 8) && !(desc.type & 2))
-			goto bad;
-		lim = desc_limit_scaled(&desc);
-		if ((desc.type & 8) || !(desc.type & 4)) {
-			/* expand-up segment */
-			if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim)
-				goto bad;
-		} else {
-			/* exapand-down segment */
-			if (addr.ea <= lim || (u32)(addr.ea + size - 1) <= lim)
-				goto bad;
-			lim = desc.d ? 0xffffffff : 0xffff;
-			if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim)
-				goto bad;
-		}
-		cpl = ctxt->ops->cpl(ctxt);
-		rpl = sel & 3;
-		cpl = max(cpl, rpl);
-		if (!(desc.type & 8)) {
-			/* data segment */
-			if (cpl > desc.dpl)
-				goto bad;
-		} else if ((desc.type & 8) && !(desc.type & 4)) {
-			/* nonconforming code segment */
-			if (cpl != desc.dpl)
-				goto bad;
-		} else if ((desc.type & 8) && (desc.type & 4)) {
-			/* conforming code segment */
-			if (cpl < desc.dpl)
-				goto bad;
-		}
-		break;
-	}
-	if (fetch ? ctxt->mode != X86EMUL_MODE_PROT64 : ctxt->ad_bytes != 8)
-		la &= (u32)-1;
-	if (insn_aligned(ctxt, size) && ((la & (size - 1)) != 0))
+	if (err >= 0)
+		return emulate_exception(ctxt, err, (int)*linear, true);
+
+	if (insn_aligned(ctxt, size) && ((*linear & (size - 1)) != 0))
 		return emulate_gp(ctxt, 0);
-	*linear = la;
+
 	return X86EMUL_CONTINUE;
-bad:
-	if (addr.seg == VCPU_SREG_SS)
-		return emulate_ss(ctxt, addr.seg);
-	else
-		return emulate_gp(ctxt, addr.seg);
 }
 
 static int linearize(struct x86_emulate_ctxt *ctxt,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6fa0e21..5ce24a8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3656,6 +3656,84 @@ out:
 }
 EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
 
+static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
+{
+	return kvm_x86_ops->get_segment_base(vcpu, seg);
+}
+
+static int kvm_linearize_address(struct kvm_vcpu *vcpu, enum x86emul_mode mode,
+		ulong ea, unsigned seg, unsigned size, bool write, bool fetch,
+		u8 ad_bytes, ulong *linear)
+{
+	ulong la;
+	unsigned cpl, rpl;
+	struct kvm_segment desc;
+
+	la = get_segment_base(vcpu, seg) + ea;
+	switch (mode) {
+	case X86EMUL_MODE_REAL:
+		break;
+	case X86EMUL_MODE_PROT64:
+		if (((signed long)la << 16) >> 16 != la) {
+			*linear = 0;
+			return GP_VECTOR;
+		}
+		break;
+	default:
+		kvm_get_segment(vcpu, &desc, seg);
+		if (desc.unusable)
+			goto bad;
+		/* code segment or read-only data segment */
+		if (((desc.type & 8) || !(desc.type & 2)) && write)
+			goto bad;
+		/* unreadable code segment */
+		if (!fetch && (desc.type & 8) && !(desc.type & 2))
+			goto bad;
+		if ((desc.type & 8) || !(desc.type & 4)) {
+			/* expand-up segment */
+			if (ea > desc.limit ||
+					(u32)(ea + size - 1) > desc.limit)
+				goto bad;
+		} else {
+			u32 lim;
+			/* exapand-down segment */
+			if (ea <= desc.limit ||
+					(u32)(ea + size - 1) <= desc.limit)
+				goto bad;
+			lim = desc.db ? 0xffffffff : 0xffff;
+			if (ea > lim || (u32)(ea + size - 1) > lim)
+				goto bad;
+		}
+		cpl = kvm_x86_ops->get_cpl(vcpu);
+		rpl = desc.selector & 3;
+		cpl = max(cpl, rpl);
+		if (!(desc.type & 8)) {
+			/* data segment */
+			if (cpl > desc.dpl)
+				goto bad;
+		} else if ((desc.type & 8) && !(desc.type & 4)) {
+			/* nonconforming code segment */
+			if (cpl != desc.dpl)
+				goto bad;
+		} else if ((desc.type & 8) && (desc.type & 4)) {
+			/* conforming code segment */
+			if (cpl < desc.dpl)
+				goto bad;
+		}
+		break;
+	}
+	if (fetch ? mode != X86EMUL_MODE_PROT64 : ad_bytes != 8)
+		la &= (u32)-1;
+	*linear = la;
+	return -1;
+bad:
+	*linear = (ulong)seg;
+	if (seg == VCPU_SREG_SS)
+		return SS_VECTOR;
+	else
+		return GP_VECTOR;
+}
+
 static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
 				gpa_t *gpa, struct x86_exception *exception,
 				bool write)
@@ -4044,11 +4122,6 @@ static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt,
 	return emulator_pio_in_out(vcpu, size, port, (void *)val, count, false);
 }
 
-static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
-{
-	return kvm_x86_ops->get_segment_base(vcpu, seg);
-}
-
 static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address)
 {
 	kvm_mmu_invlpg(emul_to_vcpu(ctxt), address);
@@ -4319,6 +4392,14 @@ static bool emulator_get_cpuid(struct x86_emulate_ctxt *ctxt,
 	return false;
 }
 
+static int emulator_linearize_address(struct x86_emulate_ctxt *ctxt,
+		struct segmented_address addr, unsigned size,
+		bool write, bool fetch, ulong *linear)
+{
+	return kvm_linearize_address(emul_to_vcpu(ctxt), ctxt->mode, addr.ea,
+			addr.seg, size, write, fetch, ctxt->ad_bytes, linear);
+}
+
 static struct x86_emulate_ops emulate_ops = {
 	.read_std            = kvm_read_guest_virt_system,
 	.write_std           = kvm_write_guest_virt_system,
@@ -4352,6 +4433,7 @@ static struct x86_emulate_ops emulate_ops = {
 	.put_fpu             = emulator_put_fpu,
 	.intercept           = emulator_intercept,
 	.get_cpuid           = emulator_get_cpuid,
+	.linearize           = emulator_linearize_address,
 };
 
 static void cache_all_regs(struct kvm_vcpu *vcpu)
-- 
1.7.7.3


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCHv2 5/5] KVM: Provide fast path for "rep ins" emulation if possible.
  2012-06-12 12:01 [PATCHv2 0/5] improve speed of "rep ins" emulation Gleb Natapov
                   ` (3 preceding siblings ...)
  2012-06-12 12:01 ` [PATCHv2 4/5] KVM: emulator: move linearize() " Gleb Natapov
@ 2012-06-12 12:01 ` Gleb Natapov
  2012-06-29 22:26   ` Marcelo Tosatti
  4 siblings, 1 reply; 25+ messages in thread
From: Gleb Natapov @ 2012-06-12 12:01 UTC (permalink / raw)
  To: kvm; +Cc: avi, mtosatti

"rep ins" emulation is going through emulator now. This is slow because
emulator knows how to write back only one datum at a time. This patch
provides fast path for the instruction in certain conditions. The
conditions are: DF flag is not set, destination memory is RAM and single
datum does not cross page boundary. If fast path code fails it falls
back to emulation.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |    6 ++
 arch/x86/kvm/svm.c              |   20 +++++--
 arch/x86/kvm/vmx.c              |   25 +++++--
 arch/x86/kvm/x86.c              |  133 ++++++++++++++++++++++++++++++++++++--
 4 files changed, 165 insertions(+), 19 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 0f52aae..e139bbc 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -404,6 +404,10 @@ struct kvm_vcpu_arch {
 	/* emulate context */
 
 	struct x86_emulate_ctxt emulate_ctxt;
+	struct x86_fast_string_pio_ctxt {
+		unsigned long linear_addr;
+		u8 ad_bytes;
+	} fast_string_pio_ctxt;
 	bool emulate_regs_need_sync_to_vcpu;
 	bool emulate_regs_need_sync_from_vcpu;
 	int (*complete_userspace_io)(struct kvm_vcpu *vcpu);
@@ -764,6 +768,8 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
 struct x86_emulate_ctxt;
 
 int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port);
+int kvm_fast_string_pio_in(struct kvm_vcpu *vcpu, int size, unsigned short port,
+		u8 ad_bytes_idx);
 void kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
 int kvm_emulate_halt(struct kvm_vcpu *vcpu);
 int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 7a41878..f3e7bb3 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1887,21 +1887,31 @@ static int io_interception(struct vcpu_svm *svm)
 {
 	struct kvm_vcpu *vcpu = &svm->vcpu;
 	u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
-	int size, in, string;
+	int size, in, string, rep;
 	unsigned port;
 
 	++svm->vcpu.stat.io_exits;
 	string = (io_info & SVM_IOIO_STR_MASK) != 0;
+	rep = (io_info & SVM_IOIO_REP_MASK) != 0;
 	in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
-	if (string || in)
-		return emulate_instruction(vcpu, 0) == EMULATE_DONE;
 
 	port = io_info >> 16;
 	size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
 	svm->next_rip = svm->vmcb->control.exit_info_2;
-	skip_emulated_instruction(&svm->vcpu);
 
-	return kvm_fast_pio_out(vcpu, size, port);
+	if (!string && !in) {
+		skip_emulated_instruction(&svm->vcpu);
+		return kvm_fast_pio_out(vcpu, size, port);
+	} else if (string && in && rep) {
+		int addr_size = (io_info & SVM_IOIO_ASIZE_MASK) >>
+			SVM_IOIO_ASIZE_SHIFT;
+		int r = kvm_fast_string_pio_in(vcpu, size, port,
+				ffs(addr_size) - 1);
+		if (r != EMULATE_FAIL)
+			return r == EMULATE_DONE;
+	}
+
+	return emulate_instruction(vcpu, 0) == EMULATE_DONE;
 }
 
 static int nmi_interception(struct vcpu_svm *svm)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index eeeb4a2..dc06630 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -639,6 +639,7 @@ static unsigned long *vmx_msr_bitmap_longmode;
 
 static bool cpu_has_load_ia32_efer;
 static bool cpu_has_load_perf_global_ctrl;
+static bool cpu_has_ins_outs_inst_info;
 
 static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS);
 static DEFINE_SPINLOCK(vmx_vpid_lock);
@@ -2522,6 +2523,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 	if (((vmx_msr_high >> 18) & 15) != 6)
 		return -EIO;
 
+	cpu_has_ins_outs_inst_info = vmx_msr_high & (1u << 22);
+
 	vmcs_conf->size = vmx_msr_high & 0x1fff;
 	vmcs_conf->order = get_order(vmcs_config.size);
 	vmcs_conf->revision_id = vmx_msr_low;
@@ -4393,23 +4396,31 @@ static int handle_triple_fault(struct kvm_vcpu *vcpu)
 static int handle_io(struct kvm_vcpu *vcpu)
 {
 	unsigned long exit_qualification;
-	int size, in, string;
+	int size, in, string, rep;
 	unsigned port;
 
 	exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
-	string = (exit_qualification & 16) != 0;
 	in = (exit_qualification & 8) != 0;
+	string = (exit_qualification & 16) != 0;
+	rep = (exit_qualification & 32) != 0;
 
 	++vcpu->stat.io_exits;
 
-	if (string || in)
-		return emulate_instruction(vcpu, 0) == EMULATE_DONE;
-
 	port = exit_qualification >> 16;
 	size = (exit_qualification & 7) + 1;
-	skip_emulated_instruction(vcpu);
 
-	return kvm_fast_pio_out(vcpu, size, port);
+	if (!string && !in) {
+		skip_emulated_instruction(vcpu);
+		return kvm_fast_pio_out(vcpu, size, port);
+	} else if (string && in && rep && cpu_has_ins_outs_inst_info) {
+		u32 inst_info = vmcs_read32(VMX_INSTRUCTION_INFO);
+		int r = kvm_fast_string_pio_in(vcpu, size, port,
+				(inst_info >> 7) & 7);
+		if (r != EMULATE_FAIL)
+			return r == EMULATE_DONE;
+	}
+
+	return emulate_instruction(vcpu, 0) == EMULATE_DONE;
 }
 
 static void
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5ce24a8..a245c4a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4485,10 +4485,22 @@ static void init_decode_cache(struct x86_emulate_ctxt *ctxt,
 	ctxt->mem_read.end = 0;
 }
 
+static enum x86emul_mode get_emulation_mode(struct kvm_vcpu *vcpu)
+{
+	int cs_db, cs_l;
+
+	kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
+
+	return (!is_protmode(vcpu))			? X86EMUL_MODE_REAL :
+		(kvm_get_rflags(vcpu) & X86_EFLAGS_VM)	? X86EMUL_MODE_VM86 :
+		cs_l					? X86EMUL_MODE_PROT64 :
+		cs_db					? X86EMUL_MODE_PROT32 :
+							  X86EMUL_MODE_PROT16;
+}
+
 static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
 {
 	struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
-	int cs_db, cs_l;
 
 	/*
 	 * TODO: fix emulate.c to use guest_read/write_register
@@ -4498,15 +4510,10 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
 	 */
 	cache_all_regs(vcpu);
 
-	kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
 
 	ctxt->eflags = kvm_get_rflags(vcpu);
 	ctxt->eip = kvm_rip_read(vcpu);
-	ctxt->mode = (!is_protmode(vcpu))		? X86EMUL_MODE_REAL :
-		     (ctxt->eflags & X86_EFLAGS_VM)	? X86EMUL_MODE_VM86 :
-		     cs_l				? X86EMUL_MODE_PROT64 :
-		     cs_db				? X86EMUL_MODE_PROT32 :
-							  X86EMUL_MODE_PROT16;
+	ctxt->mode = get_emulation_mode(vcpu);
 	ctxt->guest_mode = is_guest_mode(vcpu);
 
 	init_decode_cache(ctxt, vcpu->arch.regs);
@@ -4742,6 +4749,118 @@ int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port)
 }
 EXPORT_SYMBOL_GPL(kvm_fast_pio_out);
 
+static int __kvm_fast_string_pio_in(struct kvm_vcpu *vcpu, int size,
+		unsigned short port, unsigned long addr,
+		int count)
+{
+	struct page *page;
+	gpa_t gpa;
+	char *kaddr;
+	int ret;
+
+	gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL);
+
+	if (gpa == UNMAPPED_GVA ||
+			(gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
+		return EMULATE_FAIL;
+
+	page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
+	if (is_error_page(page)) {
+		kvm_release_page_clean(page);
+		return EMULATE_FAIL;
+	}
+
+	kaddr = kmap_atomic(page);
+	kaddr += offset_in_page(gpa);
+
+	ret = emulator_pio_in_emulated(&vcpu->arch.emulate_ctxt, size, port,
+			kaddr, count);
+
+	kunmap_atomic(kaddr);
+	if (ret) {
+		u8 ad_bytes = vcpu->arch.fast_string_pio_ctxt.ad_bytes;
+		unsigned long reg;
+
+		reg = kvm_register_read(vcpu, VCPU_REGS_RCX);
+		kvm_register_address_increment(ad_bytes, &reg, -count);
+		kvm_register_write(vcpu, VCPU_REGS_RCX, reg);
+
+		reg = kvm_register_read(vcpu, VCPU_REGS_RDI);
+		kvm_register_address_increment(ad_bytes, &reg, count * size);
+		kvm_register_write(vcpu, VCPU_REGS_RDI, reg);
+
+		kvm_release_page_dirty(page);
+		return EMULATE_DONE;
+	}
+	kvm_release_page_clean(page);
+	return EMULATE_DO_MMIO;
+}
+
+static int complete_fast_string_pio(struct kvm_vcpu *vcpu)
+{
+	unsigned long linear_addr = vcpu->arch.fast_string_pio_ctxt.linear_addr;
+	int r;
+
+	BUG_ON(!vcpu->arch.pio.count);
+
+	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+	r = __kvm_fast_string_pio_in(vcpu, vcpu->arch.pio.size,
+			vcpu->arch.pio.port, linear_addr, vcpu->arch.pio.count);
+	BUG_ON(r == EMULATE_DO_MMIO);
+	if (r == EMULATE_FAIL) /* mem slot gone while we were not looking */
+		vcpu->arch.pio.count = 0; /* drop the pio data */
+	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+	return 1;
+}
+
+int kvm_fast_string_pio_in(struct kvm_vcpu *vcpu, int size,
+		unsigned short port, u8 ad_bytes_idx)
+{
+	unsigned long rdi = kvm_register_read(vcpu, VCPU_REGS_RDI);
+	unsigned long linear_addr = rdi + get_segment_base(vcpu, VCPU_SREG_ES);
+	unsigned long rcx = kvm_register_read(vcpu, VCPU_REGS_RCX), count;
+	u8 ad_bytes;
+	int r;
+
+	if (rcx == 0) {
+		kvm_x86_ops->skip_emulated_instruction(vcpu);
+		return EMULATE_DONE;
+	}
+	if (kvm_get_rflags(vcpu) & X86_EFLAGS_DF)
+		return EMULATE_FAIL;
+	if (ad_bytes_idx > 2)
+		return EMULATE_FAIL;
+
+	ad_bytes = (u8[]){2, 4, 8}[ad_bytes_idx];
+
+	rdi = kvm_address_mask(ad_bytes, rdi);
+
+	count = (PAGE_SIZE - offset_in_page(rdi))/size;
+
+	if (count == 0) /* 'in' crosses page boundry */
+		return EMULATE_FAIL;
+
+	count = min(count, kvm_address_mask(ad_bytes, rcx));
+
+	r = kvm_linearize_address(vcpu, get_emulation_mode(vcpu),
+			rdi, VCPU_SREG_ES, count, true, false, ad_bytes,
+			&linear_addr);
+
+	if (r >= 0)
+		return EMULATE_FAIL;
+
+	r = __kvm_fast_string_pio_in(vcpu, size, port, linear_addr, count);
+
+	if (r != EMULATE_DO_MMIO)
+		return r;
+
+	vcpu->arch.fast_string_pio_ctxt.linear_addr = linear_addr;
+	vcpu->arch.fast_string_pio_ctxt.ad_bytes = ad_bytes;
+	vcpu->arch.complete_userspace_io = complete_fast_string_pio;
+	return EMULATE_DO_MMIO;
+}
+EXPORT_SYMBOL_GPL(kvm_fast_string_pio_in);
+
 static void tsc_bad(void *info)
 {
 	__this_cpu_write(cpu_tsc_khz, 0);
-- 
1.7.7.3


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* Re: [PATCHv2 4/5] KVM: emulator: move linearize() out of emulator code.
  2012-06-12 12:01 ` [PATCHv2 4/5] KVM: emulator: move linearize() " Gleb Natapov
@ 2012-06-24 13:12   ` Avi Kivity
  2012-06-24 13:27     ` Gleb Natapov
  0 siblings, 1 reply; 25+ messages in thread
From: Avi Kivity @ 2012-06-24 13:12 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm, mtosatti

On 06/12/2012 03:01 PM, Gleb Natapov wrote:
> The function will be used outside of the emulator.
> 
>  /*
>   * x86_emulate_ops:
>   *
> @@ -194,6 +199,10 @@ struct x86_emulate_ops {
>  
>  	bool (*get_cpuid)(struct x86_emulate_ctxt *ctxt,
>  			 u32 *eax, u32 *ebx, u32 *ecx, u32 *edx);
> +
> +	int (*linearize)(struct x86_emulate_ctxt *ctxt,
> +			struct segmented_address addr, unsigned size,
> +			bool write, bool fetch, ulong *linear);
>  };
>  

linearize is defined in terms of the other ops; this means that if we
get a second user they will have to replicate it.

Why not make the current linearize available to users?

-- 
error compiling committee.c: too many arguments to function



^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCHv2 4/5] KVM: emulator: move linearize() out of emulator code.
  2012-06-24 13:12   ` Avi Kivity
@ 2012-06-24 13:27     ` Gleb Natapov
  2012-06-24 13:39       ` Avi Kivity
  0 siblings, 1 reply; 25+ messages in thread
From: Gleb Natapov @ 2012-06-24 13:27 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, mtosatti

On Sun, Jun 24, 2012 at 04:12:05PM +0300, Avi Kivity wrote:
> On 06/12/2012 03:01 PM, Gleb Natapov wrote:
> > The function will be used outside of the emulator.
> > 
> >  /*
> >   * x86_emulate_ops:
> >   *
> > @@ -194,6 +199,10 @@ struct x86_emulate_ops {
> >  
> >  	bool (*get_cpuid)(struct x86_emulate_ctxt *ctxt,
> >  			 u32 *eax, u32 *ebx, u32 *ecx, u32 *edx);
> > +
> > +	int (*linearize)(struct x86_emulate_ctxt *ctxt,
> > +			struct segmented_address addr, unsigned size,
> > +			bool write, bool fetch, ulong *linear);
> >  };
> >  
> 
> linearize is defined in terms of the other ops; this means that if we
> get a second user they will have to replicate it.
> 
What do you mean? This patch series adds another user, so now there are two: one
inside the emulator another is outside.

> Why not make the current linearize available to users?
>
Code outside of the emulator does not call the emulator except when
emulation is actually needed. To call linearize() from the emulator.c
almost fully functional emulation ctxt will have to be set up (including
fake instruction decoding, hacky and slower).  To not duplicate the logic
I moved linearize() to generic code and made it available to emulator
via callback. It actually saves a couple of callback invocations when
emulator calls linearize() IIRC.

--
			Gleb.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCHv2 4/5] KVM: emulator: move linearize() out of emulator code.
  2012-06-24 13:27     ` Gleb Natapov
@ 2012-06-24 13:39       ` Avi Kivity
  2012-06-24 14:27         ` Gleb Natapov
  0 siblings, 1 reply; 25+ messages in thread
From: Avi Kivity @ 2012-06-24 13:39 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm, mtosatti

On 06/24/2012 04:27 PM, Gleb Natapov wrote:
> On Sun, Jun 24, 2012 at 04:12:05PM +0300, Avi Kivity wrote:
>> On 06/12/2012 03:01 PM, Gleb Natapov wrote:
>> > The function will be used outside of the emulator.
>> > 
>> >  /*
>> >   * x86_emulate_ops:
>> >   *
>> > @@ -194,6 +199,10 @@ struct x86_emulate_ops {
>> >  
>> >  	bool (*get_cpuid)(struct x86_emulate_ctxt *ctxt,
>> >  			 u32 *eax, u32 *ebx, u32 *ecx, u32 *edx);
>> > +
>> > +	int (*linearize)(struct x86_emulate_ctxt *ctxt,
>> > +			struct segmented_address addr, unsigned size,
>> > +			bool write, bool fetch, ulong *linear);
>> >  };
>> >  
>> 
>> linearize is defined in terms of the other ops; this means that if we
>> get a second user they will have to replicate it.
>> 
> What do you mean? This patch series adds another user, so now there are two: one
> inside the emulator another is outside.

I meant like task switching or real-mode interrupt emulation.

> 
>> Why not make the current linearize available to users?
>>
> Code outside of the emulator does not call the emulator except when
> emulation is actually needed. To call linearize() from the emulator.c
> almost fully functional emulation ctxt will have to be set up (including
> fake instruction decoding, hacky and slower). 

ctxt->d use should be removed for the exported version and replaced by a
parameter.  The internal version can still use it (calling the exported
version after extracting the parameter).

 To not duplicate the logic
> I moved linearize() to generic code and made it available to emulator
> via callback. It actually saves a couple of callback invocations when
> emulator calls linearize() IIRC.

It's not available to other emulator users (which don't exist yet
anyway).  But having linearize() in the emulator is consistent with
placing logic in emulate.c and accessors outside.

Regarding initialization, we should eventually initialize nothing and
let the emulator bring in needed data via callbacks (including general
registers).

-- 
error compiling committee.c: too many arguments to function



^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCHv2 4/5] KVM: emulator: move linearize() out of emulator code.
  2012-06-24 13:39       ` Avi Kivity
@ 2012-06-24 14:27         ` Gleb Natapov
  2012-06-25 12:57           ` Avi Kivity
  0 siblings, 1 reply; 25+ messages in thread
From: Gleb Natapov @ 2012-06-24 14:27 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, mtosatti

On Sun, Jun 24, 2012 at 04:39:22PM +0300, Avi Kivity wrote:
> On 06/24/2012 04:27 PM, Gleb Natapov wrote:
> > On Sun, Jun 24, 2012 at 04:12:05PM +0300, Avi Kivity wrote:
> >> On 06/12/2012 03:01 PM, Gleb Natapov wrote:
> >> > The function will be used outside of the emulator.
> >> > 
> >> >  /*
> >> >   * x86_emulate_ops:
> >> >   *
> >> > @@ -194,6 +199,10 @@ struct x86_emulate_ops {
> >> >  
> >> >  	bool (*get_cpuid)(struct x86_emulate_ctxt *ctxt,
> >> >  			 u32 *eax, u32 *ebx, u32 *ecx, u32 *edx);
> >> > +
> >> > +	int (*linearize)(struct x86_emulate_ctxt *ctxt,
> >> > +			struct segmented_address addr, unsigned size,
> >> > +			bool write, bool fetch, ulong *linear);
> >> >  };
> >> >  
> >> 
> >> linearize is defined in terms of the other ops; this means that if we
> >> get a second user they will have to replicate it.
> >> 
> > What do you mean? This patch series adds another user, so now there are two: one
> > inside the emulator another is outside.
> 
> I meant like task switching or real-mode interrupt emulation.
> 
You mean code outside of KVM if we ever will make emulator reusable? It will have to
have its own, much more simple version of the callback.

> > 
> >> Why not make the current linearize available to users?
> >>
> > Code outside of the emulator does not call the emulator except when
> > emulation is actually needed. To call linearize() from the emulator.c
> > almost fully functional emulation ctxt will have to be set up (including
> > fake instruction decoding, hacky and slower). 
> 
> ctxt->d use should be removed for the exported version and replaced by a
> parameter.  The internal version can still use it (calling the exported
> version after extracting the parameter).
>
IMO we should stick to the pattern we have now: calling generic code from
the emulator and not vice versa. Lets not create more spaghetti.

>  To not duplicate the logic
> > I moved linearize() to generic code and made it available to emulator
> > via callback. It actually saves a couple of callback invocations when
> > emulator calls linearize() IIRC.
> 
> It's not available to other emulator users (which don't exist yet
> anyway).  But having linearize() in the emulator is consistent with
> placing logic in emulate.c and accessors outside.
> 
It is the question of where we draw the line. For instance MMU details
are now hidden from the emulator behind a callback. One can argue that
emulator should have access to MMU directly via callbacks and
emulate memory access by itself.

> Regarding initialization, we should eventually initialize nothing and
> let the emulator bring in needed data via callbacks (including general
> registers).
> 
Some things will have to be initialized (or rather reset to initial value)
between emulator invocations. Access to registers can be done on demand,
but this is unrelated to this series optimization.

--
			Gleb.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCHv2 4/5] KVM: emulator: move linearize() out of emulator code.
  2012-06-24 14:27         ` Gleb Natapov
@ 2012-06-25 12:57           ` Avi Kivity
  2012-06-25 13:12             ` Gleb Natapov
  0 siblings, 1 reply; 25+ messages in thread
From: Avi Kivity @ 2012-06-25 12:57 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm, mtosatti

On 06/24/2012 05:27 PM, Gleb Natapov wrote:
> On Sun, Jun 24, 2012 at 04:39:22PM +0300, Avi Kivity wrote:
>> On 06/24/2012 04:27 PM, Gleb Natapov wrote:
>> > On Sun, Jun 24, 2012 at 04:12:05PM +0300, Avi Kivity wrote:
>> >> On 06/12/2012 03:01 PM, Gleb Natapov wrote:
>> >> > The function will be used outside of the emulator.
>> >> > 
>> >> >  /*
>> >> >   * x86_emulate_ops:
>> >> >   *
>> >> > @@ -194,6 +199,10 @@ struct x86_emulate_ops {
>> >> >  
>> >> >  	bool (*get_cpuid)(struct x86_emulate_ctxt *ctxt,
>> >> >  			 u32 *eax, u32 *ebx, u32 *ecx, u32 *edx);
>> >> > +
>> >> > +	int (*linearize)(struct x86_emulate_ctxt *ctxt,
>> >> > +			struct segmented_address addr, unsigned size,
>> >> > +			bool write, bool fetch, ulong *linear);
>> >> >  };
>> >> >  
>> >> 
>> >> linearize is defined in terms of the other ops; this means that if we
>> >> get a second user they will have to replicate it.
>> >> 
>> > What do you mean? This patch series adds another user, so now there are two: one
>> > inside the emulator another is outside.
>> 
>> I meant like task switching or real-mode interrupt emulation.
>> 
> You mean code outside of KVM if we ever will make emulator reusable? It will have to
> have its own, much more simple version of the callback.
> 
>> > 
>> >> Why not make the current linearize available to users?
>> >>
>> > Code outside of the emulator does not call the emulator except when
>> > emulation is actually needed. To call linearize() from the emulator.c
>> > almost fully functional emulation ctxt will have to be set up (including
>> > fake instruction decoding, hacky and slower). 
>> 
>> ctxt->d use should be removed for the exported version and replaced by a
>> parameter.  The internal version can still use it (calling the exported
>> version after extracting the parameter).
>>
> IMO we should stick to the pattern we have now: calling generic code from
> the emulator and not vice versa. Lets not create more spaghetti.
> 
>>  To not duplicate the logic
>> > I moved linearize() to generic code and made it available to emulator
>> > via callback. It actually saves a couple of callback invocations when
>> > emulator calls linearize() IIRC.
>> 
>> It's not available to other emulator users (which don't exist yet
>> anyway).  But having linearize() in the emulator is consistent with
>> placing logic in emulate.c and accessors outside.
>> 
> It is the question of where we draw the line. For instance MMU details
> are now hidden from the emulator behind a callback. One can argue that
> emulator should have access to MMU directly via callbacks and
> emulate memory access by itself.

Right now the all segment related operations are behind the line; the
line is linear | physical.  Having a ->linearize op will change that.

> 
>> Regarding initialization, we should eventually initialize nothing and
>> let the emulator bring in needed data via callbacks (including general
>> registers).
>> 
> Some things will have to be initialized (or rather reset to initial value)
> between emulator invocations. Access to registers can be done on demand,
> but this is unrelated to this series optimization.

Right.  But I think we can have x86_linearize() that doesn't take a
context parameter, only ops.

-- 
error compiling committee.c: too many arguments to function



^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCHv2 4/5] KVM: emulator: move linearize() out of emulator code.
  2012-06-25 12:57           ` Avi Kivity
@ 2012-06-25 13:12             ` Gleb Natapov
  2012-06-25 13:40               ` Avi Kivity
  0 siblings, 1 reply; 25+ messages in thread
From: Gleb Natapov @ 2012-06-25 13:12 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, mtosatti

On Mon, Jun 25, 2012 at 03:57:42PM +0300, Avi Kivity wrote:
> On 06/24/2012 05:27 PM, Gleb Natapov wrote:
> > On Sun, Jun 24, 2012 at 04:39:22PM +0300, Avi Kivity wrote:
> >> On 06/24/2012 04:27 PM, Gleb Natapov wrote:
> >> > On Sun, Jun 24, 2012 at 04:12:05PM +0300, Avi Kivity wrote:
> >> >> On 06/12/2012 03:01 PM, Gleb Natapov wrote:
> >> >> > The function will be used outside of the emulator.
> >> >> > 
> >> >> >  /*
> >> >> >   * x86_emulate_ops:
> >> >> >   *
> >> >> > @@ -194,6 +199,10 @@ struct x86_emulate_ops {
> >> >> >  
> >> >> >  	bool (*get_cpuid)(struct x86_emulate_ctxt *ctxt,
> >> >> >  			 u32 *eax, u32 *ebx, u32 *ecx, u32 *edx);
> >> >> > +
> >> >> > +	int (*linearize)(struct x86_emulate_ctxt *ctxt,
> >> >> > +			struct segmented_address addr, unsigned size,
> >> >> > +			bool write, bool fetch, ulong *linear);
> >> >> >  };
> >> >> >  
> >> >> 
> >> >> linearize is defined in terms of the other ops; this means that if we
> >> >> get a second user they will have to replicate it.
> >> >> 
> >> > What do you mean? This patch series adds another user, so now there are two: one
> >> > inside the emulator another is outside.
> >> 
> >> I meant like task switching or real-mode interrupt emulation.
> >> 
> > You mean code outside of KVM if we ever will make emulator reusable? It will have to
> > have its own, much more simple version of the callback.
> > 
> >> > 
> >> >> Why not make the current linearize available to users?
> >> >>
> >> > Code outside of the emulator does not call the emulator except when
> >> > emulation is actually needed. To call linearize() from the emulator.c
> >> > almost fully functional emulation ctxt will have to be set up (including
> >> > fake instruction decoding, hacky and slower). 
> >> 
> >> ctxt->d use should be removed for the exported version and replaced by a
> >> parameter.  The internal version can still use it (calling the exported
> >> version after extracting the parameter).
> >>
> > IMO we should stick to the pattern we have now: calling generic code from
> > the emulator and not vice versa. Lets not create more spaghetti.
> > 
> >>  To not duplicate the logic
> >> > I moved linearize() to generic code and made it available to emulator
> >> > via callback. It actually saves a couple of callback invocations when
> >> > emulator calls linearize() IIRC.
> >> 
> >> It's not available to other emulator users (which don't exist yet
> >> anyway).  But having linearize() in the emulator is consistent with
> >> placing logic in emulate.c and accessors outside.
> >> 
> > It is the question of where we draw the line. For instance MMU details
> > are now hidden from the emulator behind a callback. One can argue that
> > emulator should have access to MMU directly via callbacks and
> > emulate memory access by itself.
> 
> Right now the all segment related operations are behind the line; the
> line is linear | physical.  Having a ->linearize op will change that.
> 
> > 
> >> Regarding initialization, we should eventually initialize nothing and
> >> let the emulator bring in needed data via callbacks (including general
> >> registers).
> >> 
> > Some things will have to be initialized (or rather reset to initial value)
> > between emulator invocations. Access to registers can be done on demand,
> > but this is unrelated to this series optimization.
> 
> Right.  But I think we can have x86_linearize() that doesn't take a
> context parameter, only ops.
> 
All ops take context parameter though.

--
			Gleb.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCHv2 4/5] KVM: emulator: move linearize() out of emulator code.
  2012-06-25 13:12             ` Gleb Natapov
@ 2012-06-25 13:40               ` Avi Kivity
  2012-06-25 14:17                 ` Gleb Natapov
  0 siblings, 1 reply; 25+ messages in thread
From: Avi Kivity @ 2012-06-25 13:40 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm, mtosatti

On 06/25/2012 04:12 PM, Gleb Natapov wrote:

>> Right.  But I think we can have x86_linearize() that doesn't take a
>> context parameter, only ops.
>> 
> All ops take context parameter though.
> 

context is meaningful for:
- saving state between executions (decode/execute/execute)
- passing state that is not provided via callbacks (regs/mode/flags)
- returning results

Only the second is relevant, and we're trying to get rid of that too.

-- 
error compiling committee.c: too many arguments to function



^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCHv2 4/5] KVM: emulator: move linearize() out of emulator code.
  2012-06-25 13:40               ` Avi Kivity
@ 2012-06-25 14:17                 ` Gleb Natapov
  2012-06-25 14:32                   ` Avi Kivity
  0 siblings, 1 reply; 25+ messages in thread
From: Gleb Natapov @ 2012-06-25 14:17 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, mtosatti

On Mon, Jun 25, 2012 at 04:40:35PM +0300, Avi Kivity wrote:
> On 06/25/2012 04:12 PM, Gleb Natapov wrote:
> 
> >> Right.  But I think we can have x86_linearize() that doesn't take a
> >> context parameter, only ops.
> >> 
> > All ops take context parameter though.
> > 
> 
> context is meaningful for:
> - saving state between executions (decode/execute/execute)
> - passing state that is not provided via callbacks (regs/mode/flags)
> - returning results
> 
> Only the second is relevant, and we're trying to get rid of that too.
> 
Callbacks were passed pointer to vcpu, but they were changed to get ctxt
to better encapsulate emulator.c from rest of the KVM. Are you suggesting
this was a mistake and we need to rework callbacks to receive pointer
to vcpu again? I hope not :)

--
			Gleb.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCHv2 4/5] KVM: emulator: move linearize() out of emulator code.
  2012-06-25 14:17                 ` Gleb Natapov
@ 2012-06-25 14:32                   ` Avi Kivity
  2012-06-25 14:55                     ` Gleb Natapov
  0 siblings, 1 reply; 25+ messages in thread
From: Avi Kivity @ 2012-06-25 14:32 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm, mtosatti

On 06/25/2012 05:17 PM, Gleb Natapov wrote:
> On Mon, Jun 25, 2012 at 04:40:35PM +0300, Avi Kivity wrote:
>> On 06/25/2012 04:12 PM, Gleb Natapov wrote:
>> 
>> >> Right.  But I think we can have x86_linearize() that doesn't take a
>> >> context parameter, only ops.
>> >> 
>> > All ops take context parameter though.
>> > 
>> 
>> context is meaningful for:
>> - saving state between executions (decode/execute/execute)
>> - passing state that is not provided via callbacks (regs/mode/flags)
>> - returning results
>> 
>> Only the second is relevant, and we're trying to get rid of that too.
>> 
> Callbacks were passed pointer to vcpu, but they were changed to get ctxt
> to better encapsulate emulator.c from rest of the KVM. Are you suggesting
> this was a mistake and we need to rework callbacks to receive pointer
> to vcpu again? I hope not :)

Ouch.  I guess we have to pass the context, but not initialize any of it
except ops.  Later we can extend x86_decode_insn() and the other
functions to follow the same rule.

-- 
error compiling committee.c: too many arguments to function



^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCHv2 4/5] KVM: emulator: move linearize() out of emulator code.
  2012-06-25 14:32                   ` Avi Kivity
@ 2012-06-25 14:55                     ` Gleb Natapov
  2012-06-25 15:03                       ` Avi Kivity
  0 siblings, 1 reply; 25+ messages in thread
From: Gleb Natapov @ 2012-06-25 14:55 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, mtosatti

On Mon, Jun 25, 2012 at 05:32:31PM +0300, Avi Kivity wrote:
> On 06/25/2012 05:17 PM, Gleb Natapov wrote:
> > On Mon, Jun 25, 2012 at 04:40:35PM +0300, Avi Kivity wrote:
> >> On 06/25/2012 04:12 PM, Gleb Natapov wrote:
> >> 
> >> >> Right.  But I think we can have x86_linearize() that doesn't take a
> >> >> context parameter, only ops.
> >> >> 
> >> > All ops take context parameter though.
> >> > 
> >> 
> >> context is meaningful for:
> >> - saving state between executions (decode/execute/execute)
> >> - passing state that is not provided via callbacks (regs/mode/flags)
> >> - returning results
> >> 
> >> Only the second is relevant, and we're trying to get rid of that too.
> >> 
> > Callbacks were passed pointer to vcpu, but they were changed to get ctxt
> > to better encapsulate emulator.c from rest of the KVM. Are you suggesting
> > this was a mistake and we need to rework callbacks to receive pointer
> > to vcpu again? I hope not :)
> 
> Ouch.  I guess we have to pass the context, but not initialize any of it
> except ops.
That's hacky and error pron. We need to audit that linearize() and all
callbacks/functions it uses do not rely on un-initialized state, which
is doable now, but who will remember to check that in the future, while
changing seemingly unrelated code, which, by a coincidence, called during
linearize()? Instant security vulnerability. For security (if not
sanity) sake we should really make sure that ctxt is initialized while
we are in emulator.c and make as many checks for it as possible.

>              Later we can extend x86_decode_insn() and the other
> functions to follow the same rule.
> 
What rule? We cannot not initialize a context. You can reduce things
that should be initialized to minimum (getting GP registers on demand,
etc), but still some initialization is needed since ctxt holds emulation
state and it needs to be reset before each emulation.

--
			Gleb.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCHv2 4/5] KVM: emulator: move linearize() out of emulator code.
  2012-06-25 14:55                     ` Gleb Natapov
@ 2012-06-25 15:03                       ` Avi Kivity
  2012-06-25 15:35                         ` Gleb Natapov
  0 siblings, 1 reply; 25+ messages in thread
From: Avi Kivity @ 2012-06-25 15:03 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm, mtosatti

On 06/25/2012 05:55 PM, Gleb Natapov wrote:
> On Mon, Jun 25, 2012 at 05:32:31PM +0300, Avi Kivity wrote:
>> On 06/25/2012 05:17 PM, Gleb Natapov wrote:
>> > On Mon, Jun 25, 2012 at 04:40:35PM +0300, Avi Kivity wrote:
>> >> On 06/25/2012 04:12 PM, Gleb Natapov wrote:
>> >> 
>> >> >> Right.  But I think we can have x86_linearize() that doesn't take a
>> >> >> context parameter, only ops.
>> >> >> 
>> >> > All ops take context parameter though.
>> >> > 
>> >> 
>> >> context is meaningful for:
>> >> - saving state between executions (decode/execute/execute)
>> >> - passing state that is not provided via callbacks (regs/mode/flags)
>> >> - returning results
>> >> 
>> >> Only the second is relevant, and we're trying to get rid of that too.
>> >> 
>> > Callbacks were passed pointer to vcpu, but they were changed to get ctxt
>> > to better encapsulate emulator.c from rest of the KVM. Are you suggesting
>> > this was a mistake and we need to rework callbacks to receive pointer
>> > to vcpu again? I hope not :)
>> 
>> Ouch.  I guess we have to pass the context, but not initialize any of it
>> except ops.
> That's hacky and error pron. We need to audit that linearize() and all
> callbacks/functions it uses do not rely on un-initialized state, which
> is doable now, but who will remember to check that in the future, while
> changing seemingly unrelated code, which, by a coincidence, called during
> linearize()? Instant security vulnerability. For security (if not
> sanity) sake we should really make sure that ctxt is initialized while
> we are in emulator.c and make as many checks for it as possible.

Agree.  Though the security issue is limited; the structure won't be
uninitialized, it would retain values from the previous call.  So it's
limited to intra-guest vulnerabilities.

> 
>>              Later we can extend x86_decode_insn() and the other
>> functions to follow the same rule.
>> 
> What rule? We cannot not initialize a context. You can reduce things
> that should be initialized to minimum (getting GP registers on demand,
> etc), but still some initialization is needed since ctxt holds emulation
> state and it needs to be reset before each emulation.

An alternative is to use two contexts, the base context only holds ops
and is the parameter to all the callbacks on the non-state APIs, the
derived context holds the state:

struct x86_emulation_ctxt {
    struct x86_ops *ops;
    /* state that always needs to be initialized, preferablt none */
};

struct x86_insn_ctxt {
    struct x86_emulation_ctxt em;
    /* instruction state */
}

and so we have a compile-time split between users of the state and
non-users.

-- 
error compiling committee.c: too many arguments to function



^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCHv2 4/5] KVM: emulator: move linearize() out of emulator code.
  2012-06-25 15:03                       ` Avi Kivity
@ 2012-06-25 15:35                         ` Gleb Natapov
  2012-06-25 15:50                           ` Avi Kivity
  0 siblings, 1 reply; 25+ messages in thread
From: Gleb Natapov @ 2012-06-25 15:35 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, mtosatti

On Mon, Jun 25, 2012 at 06:03:19PM +0300, Avi Kivity wrote:
> On 06/25/2012 05:55 PM, Gleb Natapov wrote:
> > On Mon, Jun 25, 2012 at 05:32:31PM +0300, Avi Kivity wrote:
> >> On 06/25/2012 05:17 PM, Gleb Natapov wrote:
> >> > On Mon, Jun 25, 2012 at 04:40:35PM +0300, Avi Kivity wrote:
> >> >> On 06/25/2012 04:12 PM, Gleb Natapov wrote:
> >> >> 
> >> >> >> Right.  But I think we can have x86_linearize() that doesn't take a
> >> >> >> context parameter, only ops.
> >> >> >> 
> >> >> > All ops take context parameter though.
> >> >> > 
> >> >> 
> >> >> context is meaningful for:
> >> >> - saving state between executions (decode/execute/execute)
> >> >> - passing state that is not provided via callbacks (regs/mode/flags)
> >> >> - returning results
> >> >> 
> >> >> Only the second is relevant, and we're trying to get rid of that too.
> >> >> 
> >> > Callbacks were passed pointer to vcpu, but they were changed to get ctxt
> >> > to better encapsulate emulator.c from rest of the KVM. Are you suggesting
> >> > this was a mistake and we need to rework callbacks to receive pointer
> >> > to vcpu again? I hope not :)
> >> 
> >> Ouch.  I guess we have to pass the context, but not initialize any of it
> >> except ops.
> > That's hacky and error pron. We need to audit that linearize() and all
> > callbacks/functions it uses do not rely on un-initialized state, which
> > is doable now, but who will remember to check that in the future, while
> > changing seemingly unrelated code, which, by a coincidence, called during
> > linearize()? Instant security vulnerability. For security (if not
> > sanity) sake we should really make sure that ctxt is initialized while
> > we are in emulator.c and make as many checks for it as possible.
> 
> Agree.  Though the security issue is limited; the structure won't be
> uninitialized, it would retain values from the previous call.  So it's
> limited to intra-guest vulnerabilities.
> 
Yes, that's the kind I mean, not host crash. Intra-guest vulnerabilities
should not be taken lightly. From guest POV they are like buggy CPUs
that allows privilege escalation.

> > 
> >>              Later we can extend x86_decode_insn() and the other
> >> functions to follow the same rule.
> >> 
> > What rule? We cannot not initialize a context. You can reduce things
> > that should be initialized to minimum (getting GP registers on demand,
> > etc), but still some initialization is needed since ctxt holds emulation
> > state and it needs to be reset before each emulation.
> 
> An alternative is to use two contexts, the base context only holds ops
> and is the parameter to all the callbacks on the non-state APIs, the
> derived context holds the state:
> 
> struct x86_emulation_ctxt {
>     struct x86_ops *ops;
>     /* state that always needs to be initialized, preferablt none */
> };
> 
> struct x86_insn_ctxt {
>     struct x86_emulation_ctxt em;
>     /* instruction state */
> }
> 
> and so we have a compile-time split between users of the state and
> non-users.
> 
I do not understand how you will divide current ctxt structure between
those two.

Where will you put those for instance: interruptibility, have_exception,
perm_ok, only_vendor_specific_insn and how can they not be initialized
before each instruction emulation?

--
			Gleb.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCHv2 4/5] KVM: emulator: move linearize() out of emulator code.
  2012-06-25 15:35                         ` Gleb Natapov
@ 2012-06-25 15:50                           ` Avi Kivity
  2012-06-26  8:30                             ` Gleb Natapov
  0 siblings, 1 reply; 25+ messages in thread
From: Avi Kivity @ 2012-06-25 15:50 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm, mtosatti

On 06/25/2012 06:35 PM, Gleb Natapov wrote:
>> 
>> Agree.  Though the security issue is limited; the structure won't be
>> uninitialized, it would retain values from the previous call.  So it's
>> limited to intra-guest vulnerabilities.
>> 
> Yes, that's the kind I mean, not host crash. Intra-guest vulnerabilities
> should not be taken lightly. From guest POV they are like buggy CPUs
> that allows privilege escalation.

It's a smaller disaster; I didn't mean to minimize those issues.

> 
>> > 
>> >>              Later we can extend x86_decode_insn() and the other
>> >> functions to follow the same rule.
>> >> 
>> > What rule? We cannot not initialize a context. You can reduce things
>> > that should be initialized to minimum (getting GP registers on demand,
>> > etc), but still some initialization is needed since ctxt holds emulation
>> > state and it needs to be reset before each emulation.
>> 
>> An alternative is to use two contexts, the base context only holds ops
>> and is the parameter to all the callbacks on the non-state APIs, the
>> derived context holds the state:
>> 
>> struct x86_emulation_ctxt {
>>     struct x86_ops *ops;
>>     /* state that always needs to be initialized, preferablt none */
>> };
>> 
>> struct x86_insn_ctxt {
>>     struct x86_emulation_ctxt em;
>>     /* instruction state */
>> }
>> 
>> and so we have a compile-time split between users of the state and
>> non-users.
>> 
> I do not understand how you will divide current ctxt structure between
> those two.
> 
> Where will you put those for instance: interruptibility, have_exception,
> perm_ok, only_vendor_specific_insn and how can they not be initialized
> before each instruction emulation?

x86_emulate_ops::get_interruptibility()
x86_emulate_ops::set_interruptibility()
x86_emulate_ops::exception()

x86_decode_insn(struct x86_insn_ctxt *ctxt, unsigned flags)
{
    ctxt->flags = flags;
    ctxt->perm_ok = false;
}

In short, instruction emulation state is only seen by instruction
emulation functions, the others don't get to see it.

-- 
error compiling committee.c: too many arguments to function



^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCHv2 4/5] KVM: emulator: move linearize() out of emulator code.
  2012-06-25 15:50                           ` Avi Kivity
@ 2012-06-26  8:30                             ` Gleb Natapov
  2012-06-26  9:19                               ` Avi Kivity
  0 siblings, 1 reply; 25+ messages in thread
From: Gleb Natapov @ 2012-06-26  8:30 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, mtosatti

On Mon, Jun 25, 2012 at 06:50:06PM +0300, Avi Kivity wrote:
> >> >>              Later we can extend x86_decode_insn() and the other
> >> >> functions to follow the same rule.
> >> >> 
> >> > What rule? We cannot not initialize a context. You can reduce things
> >> > that should be initialized to minimum (getting GP registers on demand,
> >> > etc), but still some initialization is needed since ctxt holds emulation
> >> > state and it needs to be reset before each emulation.
> >> 
> >> An alternative is to use two contexts, the base context only holds ops
> >> and is the parameter to all the callbacks on the non-state APIs, the
> >> derived context holds the state:
> >> 
> >> struct x86_emulation_ctxt {
> >>     struct x86_ops *ops;
> >>     /* state that always needs to be initialized, preferablt none */
> >> };
> >> 
> >> struct x86_insn_ctxt {
> >>     struct x86_emulation_ctxt em;
> >>     /* instruction state */
> >> }
> >> 
> >> and so we have a compile-time split between users of the state and
> >> non-users.
> >> 
> > I do not understand how you will divide current ctxt structure between
> > those two.
> > 
> > Where will you put those for instance: interruptibility, have_exception,
> > perm_ok, only_vendor_specific_insn and how can they not be initialized
> > before each instruction emulation?
> 
> x86_emulate_ops::get_interruptibility()
> x86_emulate_ops::set_interruptibility()
> x86_emulate_ops::exception()
> 
They do not remove the need for initialization before instruction
execution, they just move things that need to be initialized somewhere
else (to kvm_arch_vcpu likely).

> x86_decode_insn(struct x86_insn_ctxt *ctxt, unsigned flags)
> {
>     ctxt->flags = flags;
>     ctxt->perm_ok = false;
> }
> 
> In short, instruction emulation state is only seen by instruction
> emulation functions, the others don't get to see it.
> 
So you want to divide emulator.c to two types of function: those without
side effect, that do some kind of calculations on vcpu state according
to weird x86 rules, and those that change vcpu state and write it back
eventually. I do not see the justification for that complication really.
emulator.c is complicated enough already and the line between two may be
blurred.

If you dislike linearize() callback so much I can make
kvm_linearize_address() to do calculation base on its parameters only.
It is almost there, only cpl and seg base/desc are missing from
parameter list. I can put it into header and x86.c/emulator.c will both
be able to use it.

--
			Gleb.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCHv2 4/5] KVM: emulator: move linearize() out of emulator code.
  2012-06-26  8:30                             ` Gleb Natapov
@ 2012-06-26  9:19                               ` Avi Kivity
  0 siblings, 0 replies; 25+ messages in thread
From: Avi Kivity @ 2012-06-26  9:19 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm, mtosatti

On 06/26/2012 11:30 AM, Gleb Natapov wrote:
>> > 
>> > Where will you put those for instance: interruptibility, have_exception,
>> > perm_ok, only_vendor_specific_insn and how can they not be initialized
>> > before each instruction emulation?
>> 
>> x86_emulate_ops::get_interruptibility()
>> x86_emulate_ops::set_interruptibility()
>> x86_emulate_ops::exception()
>> 
> They do not remove the need for initialization before instruction
> execution, they just move things that need to be initialized somewhere
> else (to kvm_arch_vcpu likely).
> 
>> x86_decode_insn(struct x86_insn_ctxt *ctxt, unsigned flags)
>> {
>>     ctxt->flags = flags;
>>     ctxt->perm_ok = false;
>> }
>> 
>> In short, instruction emulation state is only seen by instruction
>> emulation functions, the others don't get to see it.
>> 
> So you want to divide emulator.c to two types of function: those without
> side effect, that do some kind of calculations on vcpu state according
> to weird x86 rules, and those that change vcpu state and write it back
> eventually. I do not see the justification for that complication really.
> emulator.c is complicated enough already and the line between two may be
> blurred.

Really, the only issue is that the read/write callbacks sometimes cannot
return a result.  Otherwise the entire thing would be stateless.

> If you dislike linearize() callback so much I can make
> kvm_linearize_address() to do calculation base on its parameters only.
> It is almost there, only cpl and seg base/desc are missing from
> parameter list. I can put it into header and x86.c/emulator.c will both
> be able to use it.

And all the stack mask and stuff?  Yuck.


-- 
error compiling committee.c: too many arguments to function



^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCHv2 1/5] Provide userspace IO exit completion callback.
  2012-06-12 12:01 ` [PATCHv2 1/5] Provide userspace IO exit completion callback Gleb Natapov
@ 2012-06-29  0:51   ` Marcelo Tosatti
  2012-07-01  8:15     ` Gleb Natapov
  0 siblings, 1 reply; 25+ messages in thread
From: Marcelo Tosatti @ 2012-06-29  0:51 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm, avi

On Tue, Jun 12, 2012 at 03:01:23PM +0300, Gleb Natapov wrote:
> Current code assumes that IO exit was due to instruction emulation
> and handles execution back to emulator directly. This patch adds new
> userspace IO exit completion callback that can be set by any other code
> that caused IO exit to userspace.
> 
> Signed-off-by: Gleb Natapov <gleb@redhat.com>
> ---
>  arch/x86/include/asm/kvm_host.h |    1 +
>  arch/x86/kvm/x86.c              |   92 +++++++++++++++++++++++----------------
>  2 files changed, 56 insertions(+), 37 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index db7c1f2..1a1bba6 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -406,6 +406,7 @@ struct kvm_vcpu_arch {
>  	struct x86_emulate_ctxt emulate_ctxt;
>  	bool emulate_regs_need_sync_to_vcpu;
>  	bool emulate_regs_need_sync_from_vcpu;
> +	int (*complete_userspace_io)(struct kvm_vcpu *vcpu);
>  
>  	gpa_t time;
>  	struct pvclock_vcpu_time_info hv_clock;
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index a01a424..6fa0e21 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -4547,6 +4547,9 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
>  	return true;
>  }
>  
> +static int complete_emulated_mmio(struct kvm_vcpu *vcpu);
> +static int complete_emulated_pio(struct kvm_vcpu *vcpu);
> +
>  int x86_emulate_instruction(struct kvm_vcpu *vcpu,
>  			    unsigned long cr2,
>  			    int emulation_type,
> @@ -4617,13 +4620,16 @@ restart:
>  	} else if (vcpu->arch.pio.count) {
>  		if (!vcpu->arch.pio.in)
>  			vcpu->arch.pio.count = 0;
> -		else
> +		else {
>  			writeback = false;
> +			vcpu->arch.complete_userspace_io = complete_emulated_pio;
> +		}
>  		r = EMULATE_DO_MMIO;
>  	} else if (vcpu->mmio_needed) {
>  		if (!vcpu->mmio_is_write)
>  			writeback = false;
>  		r = EMULATE_DO_MMIO;
> +		vcpu->arch.complete_userspace_io = complete_emulated_mmio;
>  	} else if (r == EMULATION_RESTART)
>  		goto restart;
>  	else
> @@ -5474,6 +5480,24 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
>  	return r;
>  }
>  
> +static inline int complete_emulated_io(struct kvm_vcpu *vcpu)
> +{
> +	int r;
> +	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
> +	r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
> +	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
> +	if (r != EMULATE_DONE)
> +		return 0;
> +	return 1;
> +}
> +
> +static int complete_emulated_pio(struct kvm_vcpu *vcpu)
> +{
> +	BUG_ON(!vcpu->arch.pio.count);
> +
> +	return complete_emulated_io(vcpu);
> +}
> +
>  /*
>   * Implements the following, as a state machine:
>   *
> @@ -5490,47 +5514,37 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
>   *      copy data
>   *      exit
>   */
> -static int complete_mmio(struct kvm_vcpu *vcpu)
> +static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
>  {
>  	struct kvm_run *run = vcpu->run;
>  	struct kvm_mmio_fragment *frag;
> -	int r;
>  
> -	if (!(vcpu->arch.pio.count || vcpu->mmio_needed))
> -		return 1;
> +	BUG_ON(!vcpu->mmio_needed);
>  
> -	if (vcpu->mmio_needed) {
> -		/* Complete previous fragment */
> -		frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment++];
> -		if (!vcpu->mmio_is_write)
> -			memcpy(frag->data, run->mmio.data, frag->len);
> -		if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) {
> -			vcpu->mmio_needed = 0;
> -			if (vcpu->mmio_is_write)
> -				return 1;
> -			vcpu->mmio_read_completed = 1;
> -			goto done;
> -		}
> -		/* Initiate next fragment */
> -		++frag;
> -		run->exit_reason = KVM_EXIT_MMIO;
> -		run->mmio.phys_addr = frag->gpa;
> +	/* Complete previous fragment */
> +	frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment++];
> +	if (!vcpu->mmio_is_write)
> +		memcpy(frag->data, run->mmio.data, frag->len);
> +	if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) {
> +		vcpu->mmio_needed = 0;
>  		if (vcpu->mmio_is_write)
> -			memcpy(run->mmio.data, frag->data, frag->len);
> -		run->mmio.len = frag->len;
> -		run->mmio.is_write = vcpu->mmio_is_write;
> -		return 0;
> -
> -	}
> -done:
> -	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
> -	r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
> -	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
> -	if (r != EMULATE_DONE)
> -		return 0;
> -	return 1;
> +			return 1;
> +		vcpu->mmio_read_completed = 1;
> +		return complete_emulated_io(vcpu);
> +	}
> +	/* Initiate next fragment */
> +	++frag;
> +	run->exit_reason = KVM_EXIT_MMIO;
> +	run->mmio.phys_addr = frag->gpa;
> +	if (vcpu->mmio_is_write)
> +		memcpy(run->mmio.data, frag->data, frag->len);
> +	run->mmio.len = frag->len;
> +	run->mmio.is_write = vcpu->mmio_is_write;
> +	vcpu->arch.complete_userspace_io = complete_emulated_mmio;
> +	return 0;
>  }
>  
> +
>  int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
>  {
>  	int r;
> @@ -5557,9 +5571,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
>  		}
>  	}
>  
> -	r = complete_mmio(vcpu);
> -	if (r <= 0)
> -		goto out;
> +	if (unlikely(vcpu->arch.complete_userspace_io)) {
> +		int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io;
> +		vcpu->arch.complete_userspace_io = NULL;
> +		r = cui(vcpu);
> +		if (r <= 0)

< 0 when?


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCHv2 5/5] KVM: Provide fast path for "rep ins" emulation if possible.
  2012-06-12 12:01 ` [PATCHv2 5/5] KVM: Provide fast path for "rep ins" emulation if possible Gleb Natapov
@ 2012-06-29 22:26   ` Marcelo Tosatti
  2012-07-01 11:24     ` Gleb Natapov
  0 siblings, 1 reply; 25+ messages in thread
From: Marcelo Tosatti @ 2012-06-29 22:26 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm, avi

On Tue, Jun 12, 2012 at 03:01:27PM +0300, Gleb Natapov wrote:
> "rep ins" emulation is going through emulator now. This is slow because
> emulator knows how to write back only one datum at a time. This patch
> provides fast path for the instruction in certain conditions. The
> conditions are: DF flag is not set, destination memory is RAM and single
> datum does not cross page boundary. If fast path code fails it falls
> back to emulation.
> 
> Signed-off-by: Gleb Natapov <gleb@redhat.com>
> ---
>  arch/x86/include/asm/kvm_host.h |    6 ++
>  arch/x86/kvm/svm.c              |   20 +++++--
>  arch/x86/kvm/vmx.c              |   25 +++++--
>  arch/x86/kvm/x86.c              |  133 ++++++++++++++++++++++++++++++++++++--
>  4 files changed, 165 insertions(+), 19 deletions(-)
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index 7a41878..f3e7bb3 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -1887,21 +1887,31 @@ static int io_interception(struct vcpu_svm *svm)
>  {
>  	struct kvm_vcpu *vcpu = &svm->vcpu;
>  	u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
> -	int size, in, string;
> +	int size, in, string, rep;
>  	unsigned port;
>  
>  	++svm->vcpu.stat.io_exits;
>  	string = (io_info & SVM_IOIO_STR_MASK) != 0;
> +	rep = (io_info & SVM_IOIO_REP_MASK) != 0;
>  	in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
> -	if (string || in)
> -		return emulate_instruction(vcpu, 0) == EMULATE_DONE;
>  
>  	port = io_info >> 16;
>  	size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
>  	svm->next_rip = svm->vmcb->control.exit_info_2;
> -	skip_emulated_instruction(&svm->vcpu);
>  
> -	return kvm_fast_pio_out(vcpu, size, port);
> +	if (!string && !in) {
> +		skip_emulated_instruction(&svm->vcpu);
> +		return kvm_fast_pio_out(vcpu, size, port);
> +	} else if (string && in && rep) {

Is there a reason to restrict optimization to rep ? That is, 
it should be easy to extend to normal in?

> +		kvm_x86_ops->skip_emulated_instruction(vcpu);
> +		return EMULATE_DONE;
> +	}
> +	if (kvm_get_rflags(vcpu) & X86_EFLAGS_DF)
> +		return EMULATE_FAIL;
> +	if (ad_bytes_idx > 2)
> +		return EMULATE_FAIL;
> +
> +	ad_bytes = (u8[]){2, 4, 8}[ad_bytes_idx];
> +
> +	rdi = kvm_address_mask(ad_bytes, rdi);
> +
> +	count = (PAGE_SIZE - offset_in_page(rdi))/size;
> +
> +	if (count == 0) /* 'in' crosses page boundry */
> +		return EMULATE_FAIL;
> +
> +	count = min(count, kvm_address_mask(ad_bytes, rcx));
> + 
> +	r = kvm_linearize_address(vcpu, get_emulation_mode(vcpu),
> +			rdi, VCPU_SREG_ES, count, true, false, ad_bytes,
> +			&linear_addr);

kvm_linearize_address expects size parameter in bytes?


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCHv2 1/5] Provide userspace IO exit completion callback.
  2012-06-29  0:51   ` Marcelo Tosatti
@ 2012-07-01  8:15     ` Gleb Natapov
  0 siblings, 0 replies; 25+ messages in thread
From: Gleb Natapov @ 2012-07-01  8:15 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm, avi

On Thu, Jun 28, 2012 at 09:51:40PM -0300, Marcelo Tosatti wrote:
> On Tue, Jun 12, 2012 at 03:01:23PM +0300, Gleb Natapov wrote:
> > Current code assumes that IO exit was due to instruction emulation
> > and handles execution back to emulator directly. This patch adds new
> > userspace IO exit completion callback that can be set by any other code
> > that caused IO exit to userspace.
> > 
> > Signed-off-by: Gleb Natapov <gleb@redhat.com>
> > ---
> >  arch/x86/include/asm/kvm_host.h |    1 +
> >  arch/x86/kvm/x86.c              |   92 +++++++++++++++++++++++----------------
> >  2 files changed, 56 insertions(+), 37 deletions(-)
> > 
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index db7c1f2..1a1bba6 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -406,6 +406,7 @@ struct kvm_vcpu_arch {
> >  	struct x86_emulate_ctxt emulate_ctxt;
> >  	bool emulate_regs_need_sync_to_vcpu;
> >  	bool emulate_regs_need_sync_from_vcpu;
> > +	int (*complete_userspace_io)(struct kvm_vcpu *vcpu);
> >  
> >  	gpa_t time;
> >  	struct pvclock_vcpu_time_info hv_clock;
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index a01a424..6fa0e21 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -4547,6 +4547,9 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
> >  	return true;
> >  }
> >  
> > +static int complete_emulated_mmio(struct kvm_vcpu *vcpu);
> > +static int complete_emulated_pio(struct kvm_vcpu *vcpu);
> > +
> >  int x86_emulate_instruction(struct kvm_vcpu *vcpu,
> >  			    unsigned long cr2,
> >  			    int emulation_type,
> > @@ -4617,13 +4620,16 @@ restart:
> >  	} else if (vcpu->arch.pio.count) {
> >  		if (!vcpu->arch.pio.in)
> >  			vcpu->arch.pio.count = 0;
> > -		else
> > +		else {
> >  			writeback = false;
> > +			vcpu->arch.complete_userspace_io = complete_emulated_pio;
> > +		}
> >  		r = EMULATE_DO_MMIO;
> >  	} else if (vcpu->mmio_needed) {
> >  		if (!vcpu->mmio_is_write)
> >  			writeback = false;
> >  		r = EMULATE_DO_MMIO;
> > +		vcpu->arch.complete_userspace_io = complete_emulated_mmio;
> >  	} else if (r == EMULATION_RESTART)
> >  		goto restart;
> >  	else
> > @@ -5474,6 +5480,24 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
> >  	return r;
> >  }
> >  
> > +static inline int complete_emulated_io(struct kvm_vcpu *vcpu)
> > +{
> > +	int r;
> > +	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
> > +	r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
> > +	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
> > +	if (r != EMULATE_DONE)
> > +		return 0;
> > +	return 1;
> > +}
> > +
> > +static int complete_emulated_pio(struct kvm_vcpu *vcpu)
> > +{
> > +	BUG_ON(!vcpu->arch.pio.count);
> > +
> > +	return complete_emulated_io(vcpu);
> > +}
> > +
> >  /*
> >   * Implements the following, as a state machine:
> >   *
> > @@ -5490,47 +5514,37 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
> >   *      copy data
> >   *      exit
> >   */
> > -static int complete_mmio(struct kvm_vcpu *vcpu)
> > +static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
> >  {
> >  	struct kvm_run *run = vcpu->run;
> >  	struct kvm_mmio_fragment *frag;
> > -	int r;
> >  
> > -	if (!(vcpu->arch.pio.count || vcpu->mmio_needed))
> > -		return 1;
> > +	BUG_ON(!vcpu->mmio_needed);
> >  
> > -	if (vcpu->mmio_needed) {
> > -		/* Complete previous fragment */
> > -		frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment++];
> > -		if (!vcpu->mmio_is_write)
> > -			memcpy(frag->data, run->mmio.data, frag->len);
> > -		if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) {
> > -			vcpu->mmio_needed = 0;
> > -			if (vcpu->mmio_is_write)
> > -				return 1;
> > -			vcpu->mmio_read_completed = 1;
> > -			goto done;
> > -		}
> > -		/* Initiate next fragment */
> > -		++frag;
> > -		run->exit_reason = KVM_EXIT_MMIO;
> > -		run->mmio.phys_addr = frag->gpa;
> > +	/* Complete previous fragment */
> > +	frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment++];
> > +	if (!vcpu->mmio_is_write)
> > +		memcpy(frag->data, run->mmio.data, frag->len);
> > +	if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) {
> > +		vcpu->mmio_needed = 0;
> >  		if (vcpu->mmio_is_write)
> > -			memcpy(run->mmio.data, frag->data, frag->len);
> > -		run->mmio.len = frag->len;
> > -		run->mmio.is_write = vcpu->mmio_is_write;
> > -		return 0;
> > -
> > -	}
> > -done:
> > -	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
> > -	r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
> > -	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
> > -	if (r != EMULATE_DONE)
> > -		return 0;
> > -	return 1;
> > +			return 1;
> > +		vcpu->mmio_read_completed = 1;
> > +		return complete_emulated_io(vcpu);
> > +	}
> > +	/* Initiate next fragment */
> > +	++frag;
> > +	run->exit_reason = KVM_EXIT_MMIO;
> > +	run->mmio.phys_addr = frag->gpa;
> > +	if (vcpu->mmio_is_write)
> > +		memcpy(run->mmio.data, frag->data, frag->len);
> > +	run->mmio.len = frag->len;
> > +	run->mmio.is_write = vcpu->mmio_is_write;
> > +	vcpu->arch.complete_userspace_io = complete_emulated_mmio;
> > +	return 0;
> >  }
> >  
> > +
> >  int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
> >  {
> >  	int r;
> > @@ -5557,9 +5571,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
> >  		}
> >  	}
> >  
> > -	r = complete_mmio(vcpu);
> > -	if (r <= 0)
> > -		goto out;
> > +	if (unlikely(vcpu->arch.complete_userspace_io)) {
> > +		int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io;
> > +		vcpu->arch.complete_userspace_io = NULL;
> > +		r = cui(vcpu);
> > +		if (r <= 0)
> 
> < 0 when?
When a callback returns an error. Existing callbacks do not, but in the future they
can.

--
			Gleb.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCHv2 5/5] KVM: Provide fast path for "rep ins" emulation if possible.
  2012-06-29 22:26   ` Marcelo Tosatti
@ 2012-07-01 11:24     ` Gleb Natapov
  0 siblings, 0 replies; 25+ messages in thread
From: Gleb Natapov @ 2012-07-01 11:24 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm, avi

On Fri, Jun 29, 2012 at 07:26:38PM -0300, Marcelo Tosatti wrote:
> On Tue, Jun 12, 2012 at 03:01:27PM +0300, Gleb Natapov wrote:
> > "rep ins" emulation is going through emulator now. This is slow because
> > emulator knows how to write back only one datum at a time. This patch
> > provides fast path for the instruction in certain conditions. The
> > conditions are: DF flag is not set, destination memory is RAM and single
> > datum does not cross page boundary. If fast path code fails it falls
> > back to emulation.
> > 
> > Signed-off-by: Gleb Natapov <gleb@redhat.com>
> > ---
> >  arch/x86/include/asm/kvm_host.h |    6 ++
> >  arch/x86/kvm/svm.c              |   20 +++++--
> >  arch/x86/kvm/vmx.c              |   25 +++++--
> >  arch/x86/kvm/x86.c              |  133 ++++++++++++++++++++++++++++++++++++--
> >  4 files changed, 165 insertions(+), 19 deletions(-)
> > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> > index 7a41878..f3e7bb3 100644
> > --- a/arch/x86/kvm/svm.c
> > +++ b/arch/x86/kvm/svm.c
> > @@ -1887,21 +1887,31 @@ static int io_interception(struct vcpu_svm *svm)
> >  {
> >  	struct kvm_vcpu *vcpu = &svm->vcpu;
> >  	u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
> > -	int size, in, string;
> > +	int size, in, string, rep;
> >  	unsigned port;
> >  
> >  	++svm->vcpu.stat.io_exits;
> >  	string = (io_info & SVM_IOIO_STR_MASK) != 0;
> > +	rep = (io_info & SVM_IOIO_REP_MASK) != 0;
> >  	in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
> > -	if (string || in)
> > -		return emulate_instruction(vcpu, 0) == EMULATE_DONE;
> >  
> >  	port = io_info >> 16;
> >  	size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
> >  	svm->next_rip = svm->vmcb->control.exit_info_2;
> > -	skip_emulated_instruction(&svm->vcpu);
> >  
> > -	return kvm_fast_pio_out(vcpu, size, port);
> > +	if (!string && !in) {
> > +		skip_emulated_instruction(&svm->vcpu);
> > +		return kvm_fast_pio_out(vcpu, size, port);
> > +	} else if (string && in && rep) {
> 
> Is there a reason to restrict optimization to rep ? That is, 
> it should be easy to extend to normal in?
> 
Normal "in" does not have performance problem to the best of my knowledge.
Going through emulator for non performance critical code means less logic
to duplicate.

> > +		kvm_x86_ops->skip_emulated_instruction(vcpu);
> > +		return EMULATE_DONE;
> > +	}
> > +	if (kvm_get_rflags(vcpu) & X86_EFLAGS_DF)
> > +		return EMULATE_FAIL;
> > +	if (ad_bytes_idx > 2)
> > +		return EMULATE_FAIL;
> > +
> > +	ad_bytes = (u8[]){2, 4, 8}[ad_bytes_idx];
> > +
> > +	rdi = kvm_address_mask(ad_bytes, rdi);
> > +
> > +	count = (PAGE_SIZE - offset_in_page(rdi))/size;
> > +
> > +	if (count == 0) /* 'in' crosses page boundry */
> > +		return EMULATE_FAIL;
> > +
> > +	count = min(count, kvm_address_mask(ad_bytes, rcx));
> > + 
> > +	r = kvm_linearize_address(vcpu, get_emulation_mode(vcpu),
> > +			rdi, VCPU_SREG_ES, count, true, false, ad_bytes,
> > +			&linear_addr);
> 
> kvm_linearize_address expects size parameter in bytes?
Yes.

--
			Gleb.

^ permalink raw reply	[flat|nested] 25+ messages in thread

end of thread, other threads:[~2012-07-01 11:24 UTC | newest]

Thread overview: 25+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-06-12 12:01 [PATCHv2 0/5] improve speed of "rep ins" emulation Gleb Natapov
2012-06-12 12:01 ` [PATCHv2 1/5] Provide userspace IO exit completion callback Gleb Natapov
2012-06-29  0:51   ` Marcelo Tosatti
2012-07-01  8:15     ` Gleb Natapov
2012-06-12 12:01 ` [PATCHv2 2/5] KVM: emulator: make x86 emulation modes enum instead of defines Gleb Natapov
2012-06-12 12:01 ` [PATCHv2 3/5] KVM: emulator: move some address manipulation function out of emulator code Gleb Natapov
2012-06-12 12:01 ` [PATCHv2 4/5] KVM: emulator: move linearize() " Gleb Natapov
2012-06-24 13:12   ` Avi Kivity
2012-06-24 13:27     ` Gleb Natapov
2012-06-24 13:39       ` Avi Kivity
2012-06-24 14:27         ` Gleb Natapov
2012-06-25 12:57           ` Avi Kivity
2012-06-25 13:12             ` Gleb Natapov
2012-06-25 13:40               ` Avi Kivity
2012-06-25 14:17                 ` Gleb Natapov
2012-06-25 14:32                   ` Avi Kivity
2012-06-25 14:55                     ` Gleb Natapov
2012-06-25 15:03                       ` Avi Kivity
2012-06-25 15:35                         ` Gleb Natapov
2012-06-25 15:50                           ` Avi Kivity
2012-06-26  8:30                             ` Gleb Natapov
2012-06-26  9:19                               ` Avi Kivity
2012-06-12 12:01 ` [PATCHv2 5/5] KVM: Provide fast path for "rep ins" emulation if possible Gleb Natapov
2012-06-29 22:26   ` Marcelo Tosatti
2012-07-01 11:24     ` Gleb Natapov

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.