All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCHv4 0/5] improve speed of "rep ins" emulation
@ 2012-07-19  7:40 Gleb Natapov
  2012-07-19  7:40 ` [PATCHv4 1/5] Provide userspace IO exit completion callback Gleb Natapov
                   ` (4 more replies)
  0 siblings, 5 replies; 16+ messages in thread
From: Gleb Natapov @ 2012-07-19  7:40 UTC (permalink / raw)
  To: kvm; +Cc: avi, mtosatti

With this patches loading 100M initrd takes ~10s instead of ~40s without.

Changelog:
 v3->v4:
  - check rcx for zero after applying address mask
  - check for page boundary after linearizing address

 v2->v3
  - Fix incorrect size parameter for linearize() notices by Marcelo.
  - Get rid of linearize() callback in emulation ops.

 v1->v2
   - add segment check and mask rcx/rdi correctly drying increment.

Gleb Natapov (5):
  Provide userspace IO exit completion callback.
  KVM: emulator: make x86 emulation modes enum instead of defines
  KVM: emulator: move some address manipulation function out of
    emulator code.
  KVM: emulator: make linearize() callable from outside of emulator
  KVM: Provide fast path for "rep ins" emulation if possible.

 arch/x86/include/asm/kvm_emulate.h |   38 +++--
 arch/x86/include/asm/kvm_host.h    |   32 ++++
 arch/x86/kvm/emulate.c             |  133 ++++++++--------
 arch/x86/kvm/svm.c                 |   20 ++-
 arch/x86/kvm/vmx.c                 |   25 ++-
 arch/x86/kvm/x86.c                 |  309 ++++++++++++++++++++++++++++--------
 6 files changed, 399 insertions(+), 158 deletions(-)

-- 
1.7.10


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCHv4 1/5] Provide userspace IO exit completion callback.
  2012-07-19  7:40 [PATCHv4 0/5] improve speed of "rep ins" emulation Gleb Natapov
@ 2012-07-19  7:40 ` Gleb Natapov
  2012-07-19  7:40 ` [PATCHv4 2/5] KVM: emulator: make x86 emulation modes enum instead of defines Gleb Natapov
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 16+ messages in thread
From: Gleb Natapov @ 2012-07-19  7:40 UTC (permalink / raw)
  To: kvm; +Cc: avi, mtosatti

Current code assumes that IO exit was due to instruction emulation
and handles execution back to emulator directly. This patch adds new
userspace IO exit completion callback that can be set by any other code
that caused IO exit to userspace.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |    1 +
 arch/x86/kvm/x86.c              |   92 +++++++++++++++++++++++----------------
 2 files changed, 56 insertions(+), 37 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 24b7647..de2aff8 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -413,6 +413,7 @@ struct kvm_vcpu_arch {
 	struct x86_emulate_ctxt emulate_ctxt;
 	bool emulate_regs_need_sync_to_vcpu;
 	bool emulate_regs_need_sync_from_vcpu;
+	int (*complete_userspace_io)(struct kvm_vcpu *vcpu);
 
 	gpa_t time;
 	struct pvclock_vcpu_time_info hv_clock;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8eacb2e..a4bc431 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4552,6 +4552,9 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
 	return true;
 }
 
+static int complete_emulated_mmio(struct kvm_vcpu *vcpu);
+static int complete_emulated_pio(struct kvm_vcpu *vcpu);
+
 int x86_emulate_instruction(struct kvm_vcpu *vcpu,
 			    unsigned long cr2,
 			    int emulation_type,
@@ -4622,13 +4625,16 @@ restart:
 	} else if (vcpu->arch.pio.count) {
 		if (!vcpu->arch.pio.in)
 			vcpu->arch.pio.count = 0;
-		else
+		else {
 			writeback = false;
+			vcpu->arch.complete_userspace_io = complete_emulated_pio;
+		}
 		r = EMULATE_DO_MMIO;
 	} else if (vcpu->mmio_needed) {
 		if (!vcpu->mmio_is_write)
 			writeback = false;
 		r = EMULATE_DO_MMIO;
+		vcpu->arch.complete_userspace_io = complete_emulated_mmio;
 	} else if (r == EMULATION_RESTART)
 		goto restart;
 	else
@@ -5484,6 +5490,24 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
 	return r;
 }
 
+static inline int complete_emulated_io(struct kvm_vcpu *vcpu)
+{
+	int r;
+	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+	r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
+	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+	if (r != EMULATE_DONE)
+		return 0;
+	return 1;
+}
+
+static int complete_emulated_pio(struct kvm_vcpu *vcpu)
+{
+	BUG_ON(!vcpu->arch.pio.count);
+
+	return complete_emulated_io(vcpu);
+}
+
 /*
  * Implements the following, as a state machine:
  *
@@ -5500,47 +5524,37 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
  *      copy data
  *      exit
  */
-static int complete_mmio(struct kvm_vcpu *vcpu)
+static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
 {
 	struct kvm_run *run = vcpu->run;
 	struct kvm_mmio_fragment *frag;
-	int r;
 
-	if (!(vcpu->arch.pio.count || vcpu->mmio_needed))
-		return 1;
+	BUG_ON(!vcpu->mmio_needed);
 
-	if (vcpu->mmio_needed) {
-		/* Complete previous fragment */
-		frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment++];
-		if (!vcpu->mmio_is_write)
-			memcpy(frag->data, run->mmio.data, frag->len);
-		if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) {
-			vcpu->mmio_needed = 0;
-			if (vcpu->mmio_is_write)
-				return 1;
-			vcpu->mmio_read_completed = 1;
-			goto done;
-		}
-		/* Initiate next fragment */
-		++frag;
-		run->exit_reason = KVM_EXIT_MMIO;
-		run->mmio.phys_addr = frag->gpa;
+	/* Complete previous fragment */
+	frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment++];
+	if (!vcpu->mmio_is_write)
+		memcpy(frag->data, run->mmio.data, frag->len);
+	if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) {
+		vcpu->mmio_needed = 0;
 		if (vcpu->mmio_is_write)
-			memcpy(run->mmio.data, frag->data, frag->len);
-		run->mmio.len = frag->len;
-		run->mmio.is_write = vcpu->mmio_is_write;
-		return 0;
-
-	}
-done:
-	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
-	r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
-	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
-	if (r != EMULATE_DONE)
-		return 0;
-	return 1;
+			return 1;
+		vcpu->mmio_read_completed = 1;
+		return complete_emulated_io(vcpu);
+	}
+	/* Initiate next fragment */
+	++frag;
+	run->exit_reason = KVM_EXIT_MMIO;
+	run->mmio.phys_addr = frag->gpa;
+	if (vcpu->mmio_is_write)
+		memcpy(run->mmio.data, frag->data, frag->len);
+	run->mmio.len = frag->len;
+	run->mmio.is_write = vcpu->mmio_is_write;
+	vcpu->arch.complete_userspace_io = complete_emulated_mmio;
+	return 0;
 }
 
+
 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	int r;
@@ -5567,9 +5581,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		}
 	}
 
-	r = complete_mmio(vcpu);
-	if (r <= 0)
-		goto out;
+	if (unlikely(vcpu->arch.complete_userspace_io)) {
+		int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io;
+		vcpu->arch.complete_userspace_io = NULL;
+		r = cui(vcpu);
+		if (r <= 0)
+			goto out;
+	}
 
 	r = __vcpu_run(vcpu);
 
-- 
1.7.10


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCHv4 2/5] KVM: emulator: make x86 emulation modes enum instead of defines
  2012-07-19  7:40 [PATCHv4 0/5] improve speed of "rep ins" emulation Gleb Natapov
  2012-07-19  7:40 ` [PATCHv4 1/5] Provide userspace IO exit completion callback Gleb Natapov
@ 2012-07-19  7:40 ` Gleb Natapov
  2012-07-19  7:40 ` [PATCHv4 3/5] KVM: emulator: move some address manipulation function out of emulator code Gleb Natapov
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 16+ messages in thread
From: Gleb Natapov @ 2012-07-19  7:40 UTC (permalink / raw)
  To: kvm; +Cc: avi, mtosatti


Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/include/asm/kvm_emulate.h |   22 ++++++++++------------
 arch/x86/kvm/emulate.c             |    4 +++-
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 1ac46c22..7c276ca 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -236,6 +236,15 @@ struct read_cache {
 	unsigned long end;
 };
 
+/* Execution mode, passed to the emulator. */
+enum x86emul_mode {
+	X86EMUL_MODE_REAL,	/* Real mode.             */
+	X86EMUL_MODE_VM86,	/* Virtual 8086 mode.     */
+	X86EMUL_MODE_PROT16,	/* 16-bit protected mode. */
+	X86EMUL_MODE_PROT32,	/* 32-bit protected mode. */
+	X86EMUL_MODE_PROT64,	/* 64-bit (long) mode.    */
+};
+
 struct x86_emulate_ctxt {
 	struct x86_emulate_ops *ops;
 
@@ -243,7 +252,7 @@ struct x86_emulate_ctxt {
 	unsigned long eflags;
 	unsigned long eip; /* eip before instruction emulation */
 	/* Emulated execution mode, represented by an X86EMUL_MODE value. */
-	int mode;
+	enum x86emul_mode mode;
 
 	/* interruptibility state, as a result of execution of STI or MOV SS */
 	int interruptibility;
@@ -293,17 +302,6 @@ struct x86_emulate_ctxt {
 #define REPE_PREFIX	0xf3
 #define REPNE_PREFIX	0xf2
 
-/* Execution mode, passed to the emulator. */
-#define X86EMUL_MODE_REAL     0	/* Real mode.             */
-#define X86EMUL_MODE_VM86     1	/* Virtual 8086 mode.     */
-#define X86EMUL_MODE_PROT16   2	/* 16-bit protected mode. */
-#define X86EMUL_MODE_PROT32   4	/* 32-bit protected mode. */
-#define X86EMUL_MODE_PROT64   8	/* 64-bit (long) mode.    */
-
-/* any protected mode   */
-#define X86EMUL_MODE_PROT     (X86EMUL_MODE_PROT16|X86EMUL_MODE_PROT32| \
-			       X86EMUL_MODE_PROT64)
-
 /* CPUID vendors */
 #define X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx 0x68747541
 #define X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx 0x444d4163
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index f95d242..79899df 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2141,6 +2141,8 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt)
 		if (msr_data == 0x0)
 			return emulate_gp(ctxt, 0);
 		break;
+	default:
+		break;
 	}
 
 	ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
@@ -4179,7 +4181,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
 	}
 
 	/* Instruction can only be executed in protected mode */
-	if ((ctxt->d & Prot) && !(ctxt->mode & X86EMUL_MODE_PROT)) {
+	if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) {
 		rc = emulate_ud(ctxt);
 		goto done;
 	}
-- 
1.7.10


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCHv4 3/5] KVM: emulator: move some address manipulation function out of emulator code.
  2012-07-19  7:40 [PATCHv4 0/5] improve speed of "rep ins" emulation Gleb Natapov
  2012-07-19  7:40 ` [PATCHv4 1/5] Provide userspace IO exit completion callback Gleb Natapov
  2012-07-19  7:40 ` [PATCHv4 2/5] KVM: emulator: make x86 emulation modes enum instead of defines Gleb Natapov
@ 2012-07-19  7:40 ` Gleb Natapov
  2012-07-19 10:42   ` Avi Kivity
  2012-07-19  7:40 ` [PATCHv4 4/5] KVM: emulator: make linearize() callable from outside of emulator Gleb Natapov
  2012-07-19  7:40 ` [PATCHv4 5/5] KVM: Provide fast path for "rep ins" emulation if possible Gleb Natapov
  4 siblings, 1 reply; 16+ messages in thread
From: Gleb Natapov @ 2012-07-19  7:40 UTC (permalink / raw)
  To: kvm; +Cc: avi, mtosatti

The functions will be used outside of the emulator.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |   25 +++++++++++++++++++++++++
 arch/x86/kvm/emulate.c          |   15 ++-------------
 2 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index de2aff8..6212575 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -980,4 +980,29 @@ int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data);
 void kvm_handle_pmu_event(struct kvm_vcpu *vcpu);
 void kvm_deliver_pmi(struct kvm_vcpu *vcpu);
 
+static inline unsigned long kvm_ad_mask(u8 ad_bytes)
+{
+	return (1UL << (ad_bytes << 3)) - 1;
+}
+
+/* Access/update address held in a register, based on addressing mode. */
+static inline unsigned long
+kvm_address_mask(u8 ad_bytes, unsigned long reg)
+{
+	if (ad_bytes == sizeof(unsigned long))
+		return reg;
+	else
+		return reg & kvm_ad_mask(ad_bytes);
+}
+
+static inline void
+kvm_register_address_increment(u8 ad_bytes, unsigned long *reg, int inc)
+{
+	if (ad_bytes == sizeof(unsigned long))
+		*reg += inc;
+	else
+		*reg = (*reg & ~kvm_ad_mask(ad_bytes)) |
+			((*reg + inc) & kvm_ad_mask(ad_bytes));
+}
+
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 79899df..e317588 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -433,19 +433,11 @@ static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt,
 	return ctxt->ops->intercept(ctxt, &info, stage);
 }
 
-static inline unsigned long ad_mask(struct x86_emulate_ctxt *ctxt)
-{
-	return (1UL << (ctxt->ad_bytes << 3)) - 1;
-}
-
 /* Access/update address held in a register, based on addressing mode. */
 static inline unsigned long
 address_mask(struct x86_emulate_ctxt *ctxt, unsigned long reg)
 {
-	if (ctxt->ad_bytes == sizeof(unsigned long))
-		return reg;
-	else
-		return reg & ad_mask(ctxt);
+	return kvm_address_mask(ctxt->ad_bytes, reg);
 }
 
 static inline unsigned long
@@ -457,10 +449,7 @@ register_address(struct x86_emulate_ctxt *ctxt, unsigned long reg)
 static inline void
 register_address_increment(struct x86_emulate_ctxt *ctxt, unsigned long *reg, int inc)
 {
-	if (ctxt->ad_bytes == sizeof(unsigned long))
-		*reg += inc;
-	else
-		*reg = (*reg & ~ad_mask(ctxt)) | ((*reg + inc) & ad_mask(ctxt));
+	return kvm_register_address_increment(ctxt->ad_bytes, reg, inc);
 }
 
 static inline void jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
-- 
1.7.10


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCHv4 4/5] KVM: emulator: make linearize() callable from outside of emulator
  2012-07-19  7:40 [PATCHv4 0/5] improve speed of "rep ins" emulation Gleb Natapov
                   ` (2 preceding siblings ...)
  2012-07-19  7:40 ` [PATCHv4 3/5] KVM: emulator: move some address manipulation function out of emulator code Gleb Natapov
@ 2012-07-19  7:40 ` Gleb Natapov
  2012-07-19 10:32   ` Avi Kivity
  2012-07-19  7:40 ` [PATCHv4 5/5] KVM: Provide fast path for "rep ins" emulation if possible Gleb Natapov
  4 siblings, 1 reply; 16+ messages in thread
From: Gleb Natapov @ 2012-07-19  7:40 UTC (permalink / raw)
  To: kvm; +Cc: avi, mtosatti

The function will be used outside of the emulator.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/include/asm/kvm_emulate.h |   16 +++++
 arch/x86/kvm/emulate.c             |  114 ++++++++++++++++++++----------------
 2 files changed, 79 insertions(+), 51 deletions(-)

diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 7c276ca..9ce651b 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -374,6 +374,21 @@ enum x86_intercept {
 	nr_x86_intercepts
 };
 
+struct x86_linearize_params
+{
+	enum x86emul_mode mode;
+	ulong ea;
+	unsigned size;
+	unsigned seg;
+	struct desc_struct desc;
+	u16 sel;
+	bool usable;
+	bool write;
+	bool fetch;
+	u8 ad_bytes;
+	unsigned cpl;
+};
+
 /* Host execution mode. */
 #if defined(CONFIG_X86_32)
 #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32
@@ -392,4 +407,5 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
 			 u16 tss_selector, int idt_index, int reason,
 			 bool has_error_code, u32 error_code);
 int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq);
+int x86_linearize(struct x86_linearize_params *p, ulong *linear);
 #endif /* _ASM_X86_KVM_X86_EMULATE_H */
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index e317588..79368d2 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -470,14 +470,6 @@ static void set_seg_override(struct x86_emulate_ctxt *ctxt, int seg)
 	ctxt->seg_override = seg;
 }
 
-static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
-{
-	if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS)
-		return 0;
-
-	return ctxt->ops->get_cached_segment_base(ctxt, seg);
-}
-
 static unsigned seg_override(struct x86_emulate_ctxt *ctxt)
 {
 	if (!ctxt->has_seg_override)
@@ -505,11 +497,6 @@ static int emulate_gp(struct x86_emulate_ctxt *ctxt, int err)
 	return emulate_exception(ctxt, GP_VECTOR, err, true);
 }
 
-static int emulate_ss(struct x86_emulate_ctxt *ctxt, int err)
-{
-	return emulate_exception(ctxt, SS_VECTOR, err, true);
-}
-
 static int emulate_ud(struct x86_emulate_ctxt *ctxt)
 {
 	return emulate_exception(ctxt, UD_VECTOR, 0, false);
@@ -573,79 +560,104 @@ static bool insn_aligned(struct x86_emulate_ctxt *ctxt, unsigned size)
 		return true;
 }
 
-static int __linearize(struct x86_emulate_ctxt *ctxt,
-		     struct segmented_address addr,
-		     unsigned size, bool write, bool fetch,
-		     ulong *linear)
+int x86_linearize(struct x86_linearize_params *p, ulong *linear)
 {
-	struct desc_struct desc;
-	bool usable;
 	ulong la;
 	u32 lim;
-	u16 sel;
 	unsigned cpl, rpl;
 
-	la = seg_base(ctxt, addr.seg) + addr.ea;
-	switch (ctxt->mode) {
+	la = get_desc_base(&p->desc) + p->ea;
+	switch (p->mode) {
 	case X86EMUL_MODE_REAL:
 		break;
 	case X86EMUL_MODE_PROT64:
-		if (((signed long)la << 16) >> 16 != la)
-			return emulate_gp(ctxt, 0);
+		if (((signed long)la << 16) >> 16 != la) {
+			*linear = 0;
+			return GP_VECTOR;
+		}
 		break;
 	default:
-		usable = ctxt->ops->get_segment(ctxt, &sel, &desc, NULL,
-						addr.seg);
-		if (!usable)
+		if (!p->usable)
 			goto bad;
 		/* code segment or read-only data segment */
-		if (((desc.type & 8) || !(desc.type & 2)) && write)
+		if (((p->desc.type & 8) || !(p->desc.type & 2)) && p->write)
 			goto bad;
 		/* unreadable code segment */
-		if (!fetch && (desc.type & 8) && !(desc.type & 2))
+		if (!p->fetch && (p->desc.type & 8) && !(p->desc.type & 2))
 			goto bad;
-		lim = desc_limit_scaled(&desc);
-		if ((desc.type & 8) || !(desc.type & 4)) {
+		lim = desc_limit_scaled(&p->desc);
+		if ((p->desc.type & 8) || !(p->desc.type & 4)) {
 			/* expand-up segment */
-			if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim)
+			if (p->ea > lim || (u32)(p->ea + p->size - 1) > lim)
 				goto bad;
 		} else {
 			/* exapand-down segment */
-			if (addr.ea <= lim || (u32)(addr.ea + size - 1) <= lim)
+			if (p->ea <= lim || (u32)(p->ea + p->size - 1) <= lim)
 				goto bad;
-			lim = desc.d ? 0xffffffff : 0xffff;
-			if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim)
+			lim = p->desc.d ? 0xffffffff : 0xffff;
+			if (p->ea > lim || (u32)(p->ea + p->size - 1) > lim)
 				goto bad;
 		}
-		cpl = ctxt->ops->cpl(ctxt);
-		rpl = sel & 3;
-		cpl = max(cpl, rpl);
-		if (!(desc.type & 8)) {
+		rpl = p->sel & 3;
+		cpl = max(p->cpl, rpl);
+		if (!(p->desc.type & 8)) {
 			/* data segment */
-			if (cpl > desc.dpl)
+			if (cpl > p->desc.dpl)
 				goto bad;
-		} else if ((desc.type & 8) && !(desc.type & 4)) {
+		} else if ((p->desc.type & 8) && !(p->desc.type & 4)) {
 			/* nonconforming code segment */
-			if (cpl != desc.dpl)
+			if (cpl != p->desc.dpl)
 				goto bad;
-		} else if ((desc.type & 8) && (desc.type & 4)) {
+		} else if ((p->desc.type & 8) && (p->desc.type & 4)) {
 			/* conforming code segment */
-			if (cpl < desc.dpl)
+			if (cpl < p->desc.dpl)
 				goto bad;
 		}
 		break;
 	}
-	if (fetch ? ctxt->mode != X86EMUL_MODE_PROT64 : ctxt->ad_bytes != 8)
+	if (p->fetch ? p->mode != X86EMUL_MODE_PROT64 : p->ad_bytes != 8)
 		la &= (u32)-1;
-	if (insn_aligned(ctxt, size) && ((la & (size - 1)) != 0))
-		return emulate_gp(ctxt, 0);
 	*linear = la;
-	return X86EMUL_CONTINUE;
+	return -1;
 bad:
-	if (addr.seg == VCPU_SREG_SS)
-		return emulate_ss(ctxt, addr.seg);
+	*linear = (ulong)p->seg;
+	if (p->seg == VCPU_SREG_SS)
+		return SS_VECTOR;
 	else
-		return emulate_gp(ctxt, addr.seg);
+		return GP_VECTOR;
+}
+
+static int __linearize(struct x86_emulate_ctxt *ctxt,
+		     struct segmented_address addr,
+		     unsigned size, bool write, bool fetch,
+		     ulong *linear)
+{
+	int err;
+
+	struct x86_linearize_params param = {
+		.mode = ctxt->mode,
+		.ea = addr.ea,
+		.size = size,
+		.seg = addr.seg,
+		.write = write,
+		.fetch = fetch,
+		.ad_bytes = ctxt->ad_bytes,
+		.cpl = ctxt->ops->cpl(ctxt)
+	};
+
+	param.usable = ctxt->ops->get_segment(ctxt, &param.sel, &param.desc,
+			NULL, addr.seg);
+
+
+	err = x86_linearize(&param, linear);
+
+	if (err >= 0)
+		return emulate_exception(ctxt, err, (int)*linear, true);
+
+	if (insn_aligned(ctxt, size) && ((*linear & (size - 1)) != 0))
+		return emulate_gp(ctxt, 0);
+
+	return X86EMUL_CONTINUE;
 }
 
 static int linearize(struct x86_emulate_ctxt *ctxt,
-- 
1.7.10


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCHv4 5/5] KVM: Provide fast path for "rep ins" emulation if possible.
  2012-07-19  7:40 [PATCHv4 0/5] improve speed of "rep ins" emulation Gleb Natapov
                   ` (3 preceding siblings ...)
  2012-07-19  7:40 ` [PATCHv4 4/5] KVM: emulator: make linearize() callable from outside of emulator Gleb Natapov
@ 2012-07-19  7:40 ` Gleb Natapov
  2012-07-19 10:37   ` Avi Kivity
  4 siblings, 1 reply; 16+ messages in thread
From: Gleb Natapov @ 2012-07-19  7:40 UTC (permalink / raw)
  To: kvm; +Cc: avi, mtosatti

"rep ins" emulation is going through emulator now. This is slow because
emulator knows how to write back only one datum at a time. This patch
provides fast path for the instruction in certain conditions. The
conditions are: DF flag is not set, destination memory is RAM and single
datum does not cross page boundary. If fast path code fails it falls
back to emulation.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |    6 ++
 arch/x86/kvm/svm.c              |   20 +++-
 arch/x86/kvm/vmx.c              |   25 +++--
 arch/x86/kvm/x86.c              |  217 +++++++++++++++++++++++++++++++++------
 4 files changed, 224 insertions(+), 44 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 6212575..ecf8430 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -411,6 +411,10 @@ struct kvm_vcpu_arch {
 	/* emulate context */
 
 	struct x86_emulate_ctxt emulate_ctxt;
+	struct x86_fast_string_pio_ctxt {
+		unsigned long linear_addr;
+		u8 ad_bytes;
+	} fast_string_pio_ctxt;
 	bool emulate_regs_need_sync_to_vcpu;
 	bool emulate_regs_need_sync_from_vcpu;
 	int (*complete_userspace_io)(struct kvm_vcpu *vcpu);
@@ -776,6 +780,8 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
 struct x86_emulate_ctxt;
 
 int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port);
+int kvm_fast_string_pio_in(struct kvm_vcpu *vcpu, int size, unsigned short port,
+		u8 ad_bytes_idx);
 void kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
 int kvm_emulate_halt(struct kvm_vcpu *vcpu);
 int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 7a41878..f3e7bb3 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1887,21 +1887,31 @@ static int io_interception(struct vcpu_svm *svm)
 {
 	struct kvm_vcpu *vcpu = &svm->vcpu;
 	u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
-	int size, in, string;
+	int size, in, string, rep;
 	unsigned port;
 
 	++svm->vcpu.stat.io_exits;
 	string = (io_info & SVM_IOIO_STR_MASK) != 0;
+	rep = (io_info & SVM_IOIO_REP_MASK) != 0;
 	in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
-	if (string || in)
-		return emulate_instruction(vcpu, 0) == EMULATE_DONE;
 
 	port = io_info >> 16;
 	size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
 	svm->next_rip = svm->vmcb->control.exit_info_2;
-	skip_emulated_instruction(&svm->vcpu);
 
-	return kvm_fast_pio_out(vcpu, size, port);
+	if (!string && !in) {
+		skip_emulated_instruction(&svm->vcpu);
+		return kvm_fast_pio_out(vcpu, size, port);
+	} else if (string && in && rep) {
+		int addr_size = (io_info & SVM_IOIO_ASIZE_MASK) >>
+			SVM_IOIO_ASIZE_SHIFT;
+		int r = kvm_fast_string_pio_in(vcpu, size, port,
+				ffs(addr_size) - 1);
+		if (r != EMULATE_FAIL)
+			return r == EMULATE_DONE;
+	}
+
+	return emulate_instruction(vcpu, 0) == EMULATE_DONE;
 }
 
 static int nmi_interception(struct vcpu_svm *svm)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e10ec0e..bae2c11 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -639,6 +639,7 @@ static unsigned long *vmx_msr_bitmap_longmode;
 
 static bool cpu_has_load_ia32_efer;
 static bool cpu_has_load_perf_global_ctrl;
+static bool cpu_has_ins_outs_inst_info;
 
 static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS);
 static DEFINE_SPINLOCK(vmx_vpid_lock);
@@ -2522,6 +2523,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 	if (((vmx_msr_high >> 18) & 15) != 6)
 		return -EIO;
 
+	cpu_has_ins_outs_inst_info = vmx_msr_high & (1u << 22);
+
 	vmcs_conf->size = vmx_msr_high & 0x1fff;
 	vmcs_conf->order = get_order(vmcs_config.size);
 	vmcs_conf->revision_id = vmx_msr_low;
@@ -4393,23 +4396,31 @@ static int handle_triple_fault(struct kvm_vcpu *vcpu)
 static int handle_io(struct kvm_vcpu *vcpu)
 {
 	unsigned long exit_qualification;
-	int size, in, string;
+	int size, in, string, rep;
 	unsigned port;
 
 	exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
-	string = (exit_qualification & 16) != 0;
 	in = (exit_qualification & 8) != 0;
+	string = (exit_qualification & 16) != 0;
+	rep = (exit_qualification & 32) != 0;
 
 	++vcpu->stat.io_exits;
 
-	if (string || in)
-		return emulate_instruction(vcpu, 0) == EMULATE_DONE;
-
 	port = exit_qualification >> 16;
 	size = (exit_qualification & 7) + 1;
-	skip_emulated_instruction(vcpu);
 
-	return kvm_fast_pio_out(vcpu, size, port);
+	if (!string && !in) {
+		skip_emulated_instruction(vcpu);
+		return kvm_fast_pio_out(vcpu, size, port);
+	} else if (string && in && rep && cpu_has_ins_outs_inst_info) {
+		u32 inst_info = vmcs_read32(VMX_INSTRUCTION_INFO);
+		int r = kvm_fast_string_pio_in(vcpu, size, port,
+				(inst_info >> 7) & 7);
+		if (r != EMULATE_FAIL)
+			return r == EMULATE_DONE;
+	}
+
+	return emulate_instruction(vcpu, 0) == EMULATE_DONE;
 }
 
 static void
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a4bc431..efdaa21 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3661,6 +3661,59 @@ out:
 }
 EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
 
+static bool get_segment_descriptor(struct kvm_vcpu *vcpu, u16 *selector,
+				 struct desc_struct *desc, u32 *base3,
+				 int seg)
+{
+	struct kvm_segment var;
+
+	kvm_get_segment(vcpu, &var, seg);
+	*selector = var.selector;
+
+	if (var.unusable)
+		return false;
+
+	if (var.g)
+		var.limit >>= 12;
+	set_desc_limit(desc, var.limit);
+	set_desc_base(desc, (unsigned long)var.base);
+#ifdef CONFIG_X86_64
+	if (base3)
+		*base3 = var.base >> 32;
+#endif
+	desc->type = var.type;
+	desc->s = var.s;
+	desc->dpl = var.dpl;
+	desc->p = var.present;
+	desc->avl = var.avl;
+	desc->l = var.l;
+	desc->d = var.db;
+	desc->g = var.g;
+
+	return true;
+}
+
+static int kvm_linearize_address(struct kvm_vcpu *vcpu, enum x86emul_mode mode,
+		ulong ea, unsigned seg, unsigned size, bool write, bool fetch,
+		u8 ad_bytes, ulong *linear)
+{
+	struct x86_linearize_params param = {
+		.mode = mode,
+		.ea = ea,
+		.size = size,
+		.seg = seg,
+		.write = write,
+		.fetch = fetch,
+		.ad_bytes = ad_bytes,
+		.cpl = kvm_x86_ops->get_cpl(vcpu)
+	};
+
+	param.usable = get_segment_descriptor(vcpu, &param.sel, &param.desc,
+			NULL, seg);
+
+	return x86_linearize(&param, linear);
+}
+
 static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
 				gpa_t *gpa, struct x86_exception *exception,
 				bool write)
@@ -4197,32 +4250,9 @@ static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector,
 				 struct desc_struct *desc, u32 *base3,
 				 int seg)
 {
-	struct kvm_segment var;
-
-	kvm_get_segment(emul_to_vcpu(ctxt), &var, seg);
-	*selector = var.selector;
-
-	if (var.unusable)
-		return false;
-
-	if (var.g)
-		var.limit >>= 12;
-	set_desc_limit(desc, var.limit);
-	set_desc_base(desc, (unsigned long)var.base);
-#ifdef CONFIG_X86_64
-	if (base3)
-		*base3 = var.base >> 32;
-#endif
-	desc->type = var.type;
-	desc->s = var.s;
-	desc->dpl = var.dpl;
-	desc->p = var.present;
-	desc->avl = var.avl;
-	desc->l = var.l;
-	desc->d = var.db;
-	desc->g = var.g;
 
-	return true;
+	return get_segment_descriptor(emul_to_vcpu(ctxt), selector, desc, base3,
+			seg);
 }
 
 static void emulator_set_segment(struct x86_emulate_ctxt *ctxt, u16 selector,
@@ -4408,10 +4438,22 @@ static void init_decode_cache(struct x86_emulate_ctxt *ctxt,
 	ctxt->mem_read.end = 0;
 }
 
+static enum x86emul_mode get_emulation_mode(struct kvm_vcpu *vcpu)
+{
+	int cs_db, cs_l;
+
+	kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
+
+	return (!is_protmode(vcpu))			? X86EMUL_MODE_REAL :
+		(kvm_get_rflags(vcpu) & X86_EFLAGS_VM)	? X86EMUL_MODE_VM86 :
+		cs_l					? X86EMUL_MODE_PROT64 :
+		cs_db					? X86EMUL_MODE_PROT32 :
+							  X86EMUL_MODE_PROT16;
+}
+
 static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
 {
 	struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
-	int cs_db, cs_l;
 
 	/*
 	 * TODO: fix emulate.c to use guest_read/write_register
@@ -4421,15 +4463,10 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
 	 */
 	cache_all_regs(vcpu);
 
-	kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
 
 	ctxt->eflags = kvm_get_rflags(vcpu);
 	ctxt->eip = kvm_rip_read(vcpu);
-	ctxt->mode = (!is_protmode(vcpu))		? X86EMUL_MODE_REAL :
-		     (ctxt->eflags & X86_EFLAGS_VM)	? X86EMUL_MODE_VM86 :
-		     cs_l				? X86EMUL_MODE_PROT64 :
-		     cs_db				? X86EMUL_MODE_PROT32 :
-							  X86EMUL_MODE_PROT16;
+	ctxt->mode = get_emulation_mode(vcpu);
 	ctxt->guest_mode = is_guest_mode(vcpu);
 
 	init_decode_cache(ctxt, vcpu->arch.regs);
@@ -4665,6 +4702,122 @@ int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port)
 }
 EXPORT_SYMBOL_GPL(kvm_fast_pio_out);
 
+static int __kvm_fast_string_pio_in(struct kvm_vcpu *vcpu, int size,
+		unsigned short port, unsigned long addr,
+		int count)
+{
+	struct page *page;
+	gpa_t gpa;
+	char *kaddr;
+	int ret;
+
+	gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL);
+
+	if (gpa == UNMAPPED_GVA ||
+			(gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
+		return EMULATE_FAIL;
+
+	page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
+	if (is_error_page(page)) {
+		kvm_release_page_clean(page);
+		return EMULATE_FAIL;
+	}
+
+	kaddr = kmap_atomic(page);
+	kaddr += offset_in_page(gpa);
+
+	ret = emulator_pio_in_emulated(&vcpu->arch.emulate_ctxt, size, port,
+			kaddr, count);
+
+	kunmap_atomic(kaddr);
+	if (ret) {
+		u8 ad_bytes = vcpu->arch.fast_string_pio_ctxt.ad_bytes;
+		unsigned long reg;
+
+		reg = kvm_register_read(vcpu, VCPU_REGS_RCX);
+		kvm_register_address_increment(ad_bytes, &reg, -count);
+		kvm_register_write(vcpu, VCPU_REGS_RCX, reg);
+
+		reg = kvm_register_read(vcpu, VCPU_REGS_RDI);
+		kvm_register_address_increment(ad_bytes, &reg, count * size);
+		kvm_register_write(vcpu, VCPU_REGS_RDI, reg);
+
+		kvm_release_page_dirty(page);
+		return EMULATE_DONE;
+	}
+	kvm_release_page_clean(page);
+	return EMULATE_DO_MMIO;
+}
+
+static int complete_fast_string_pio(struct kvm_vcpu *vcpu)
+{
+	unsigned long linear_addr = vcpu->arch.fast_string_pio_ctxt.linear_addr;
+	int r;
+
+	BUG_ON(!vcpu->arch.pio.count);
+
+	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+	r = __kvm_fast_string_pio_in(vcpu, vcpu->arch.pio.size,
+			vcpu->arch.pio.port, linear_addr, vcpu->arch.pio.count);
+	BUG_ON(r == EMULATE_DO_MMIO);
+	if (r == EMULATE_FAIL) /* mem slot gone while we were not looking */
+		vcpu->arch.pio.count = 0; /* drop the pio data */
+	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+	return 1;
+}
+
+int kvm_fast_string_pio_in(struct kvm_vcpu *vcpu, int size,
+		unsigned short port, u8 ad_bytes_idx)
+{
+	unsigned long rdi = kvm_register_read(vcpu, VCPU_REGS_RDI);
+	unsigned long linear_addr;
+	unsigned long rcx = kvm_register_read(vcpu, VCPU_REGS_RCX), count;
+	u8 ad_bytes;
+	int r;
+
+	if (ad_bytes_idx > 2)
+		return EMULATE_FAIL;
+
+	ad_bytes = (u8[]){2, 4, 8}[ad_bytes_idx];
+
+	rcx = kvm_address_mask(ad_bytes, rcx);
+
+	if (rcx == 0) {
+		kvm_x86_ops->skip_emulated_instruction(vcpu);
+		return EMULATE_DONE;
+	}
+
+	if (kvm_get_rflags(vcpu) & X86_EFLAGS_DF)
+		return EMULATE_FAIL;
+
+	rdi = kvm_address_mask(ad_bytes, rdi);
+
+	r = kvm_linearize_address(vcpu, get_emulation_mode(vcpu),
+			rdi, VCPU_SREG_ES, rcx * size, true, false, ad_bytes,
+			&linear_addr);
+
+	if (r >= 0)
+		return EMULATE_FAIL;
+
+	count = (PAGE_SIZE - offset_in_page(linear_addr))/size;
+
+	if (count == 0) /* 'in' crosses page boundry */
+		return EMULATE_FAIL;
+
+	count = min(count, rcx);
+
+	r = __kvm_fast_string_pio_in(vcpu, size, port, linear_addr, count);
+
+	if (r != EMULATE_DO_MMIO)
+		return r;
+
+	vcpu->arch.fast_string_pio_ctxt.linear_addr = linear_addr;
+	vcpu->arch.fast_string_pio_ctxt.ad_bytes = ad_bytes;
+	vcpu->arch.complete_userspace_io = complete_fast_string_pio;
+	return EMULATE_DO_MMIO;
+}
+EXPORT_SYMBOL_GPL(kvm_fast_string_pio_in);
+
 static void tsc_bad(void *info)
 {
 	__this_cpu_write(cpu_tsc_khz, 0);
-- 
1.7.10


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCHv4 4/5] KVM: emulator: make linearize() callable from outside of emulator
  2012-07-19  7:40 ` [PATCHv4 4/5] KVM: emulator: make linearize() callable from outside of emulator Gleb Natapov
@ 2012-07-19 10:32   ` Avi Kivity
  2012-07-19 10:51     ` Gleb Natapov
  0 siblings, 1 reply; 16+ messages in thread
From: Avi Kivity @ 2012-07-19 10:32 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm, mtosatti

On 07/19/2012 10:40 AM, Gleb Natapov wrote:
> The function will be used outside of the emulator.
> 
> Signed-off-by: Gleb Natapov <gleb@redhat.com>
> ---
>  arch/x86/include/asm/kvm_emulate.h |   16 +++++
>  arch/x86/kvm/emulate.c             |  114 ++++++++++++++++++++----------------
>  2 files changed, 79 insertions(+), 51 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
> index 7c276ca..9ce651b 100644
> --- a/arch/x86/include/asm/kvm_emulate.h
> +++ b/arch/x86/include/asm/kvm_emulate.h
> @@ -374,6 +374,21 @@ enum x86_intercept {
>  	nr_x86_intercepts
>  };
>  
> +struct x86_linearize_params
> +{
> +	enum x86emul_mode mode;
> +	ulong ea;
> +	unsigned size;
> +	unsigned seg;
> +	struct desc_struct desc;
> +	u16 sel;
> +	bool usable;
> +	bool write;
> +	bool fetch;
> +	u8 ad_bytes;
> +	unsigned cpl;
> +};
> +
> @@ -470,14 +470,6 @@ static void set_seg_override(struct x86_emulate_ctxt *ctxt, int seg)
>  	ctxt->seg_override = seg;
>  }
>  
> -static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
> -{
> -	if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS)
> -		return 0;
> -
> -	return ctxt->ops->get_cached_segment_base(ctxt, seg);
> -}
> -

> +int x86_linearize(struct x86_linearize_params *p, ulong *linear)
>  {
> -	struct desc_struct desc;
> -	bool usable;
>  	ulong la;
>  	u32 lim;
> -	u16 sel;
>  	unsigned cpl, rpl;
>  
> -	la = seg_base(ctxt, addr.seg) + addr.ea;
> -	switch (ctxt->mode) {
> +	la = get_desc_base(&p->desc) + p->ea;

This makes 64-bit mode slower, since before the patch it avoided reading
the segment base for non-fs/gs segments, and only read the segment base
for fs/gs.  After the patch we always execute 4 VMREADs (and decode the
results).


-- 
error compiling committee.c: too many arguments to function



^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCHv4 5/5] KVM: Provide fast path for "rep ins" emulation if possible.
  2012-07-19  7:40 ` [PATCHv4 5/5] KVM: Provide fast path for "rep ins" emulation if possible Gleb Natapov
@ 2012-07-19 10:37   ` Avi Kivity
  2012-07-19 11:09     ` Gleb Natapov
  0 siblings, 1 reply; 16+ messages in thread
From: Avi Kivity @ 2012-07-19 10:37 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm, mtosatti

On 07/19/2012 10:40 AM, Gleb Natapov wrote:
> "rep ins" emulation is going through emulator now. This is slow because
> emulator knows how to write back only one datum at a time. This patch
> provides fast path for the instruction in certain conditions. The
> conditions are: DF flag is not set, destination memory is RAM and single
> datum does not cross page boundary. If fast path code fails it falls
> back to emulation.
> 
> +
> +int kvm_fast_string_pio_in(struct kvm_vcpu *vcpu, int size,
> +		unsigned short port, u8 ad_bytes_idx)
> +{
> +	unsigned long rdi = kvm_register_read(vcpu, VCPU_REGS_RDI);
> +	unsigned long linear_addr;
> +	unsigned long rcx = kvm_register_read(vcpu, VCPU_REGS_RCX), count;
> +	u8 ad_bytes;
> +	int r;
> +
> +	if (ad_bytes_idx > 2)
> +		return EMULATE_FAIL;
> +
> +	ad_bytes = (u8[]){2, 4, 8}[ad_bytes_idx];
> +

2 << logs2_ad_bytes instead of this nice functional programming style.

> +	rcx = kvm_address_mask(ad_bytes, rcx);
> +
> +	if (rcx == 0) {
> +		kvm_x86_ops->skip_emulated_instruction(vcpu);
> +		return EMULATE_DONE;
> +	}
> +
> +	if (kvm_get_rflags(vcpu) & X86_EFLAGS_DF)
> +		return EMULATE_FAIL;
> +
> +	rdi = kvm_address_mask(ad_bytes, rdi);
> +
> +	r = kvm_linearize_address(vcpu, get_emulation_mode(vcpu),
> +			rdi, VCPU_SREG_ES, rcx * size, true, false, ad_bytes,
> +			&linear_addr);
> +
> +	if (r >= 0)
> +		return EMULATE_FAIL;
> +
> +	count = (PAGE_SIZE - offset_in_page(linear_addr))/size;
> +
> +	if (count == 0) /* 'in' crosses page boundry */
> +		return EMULATE_FAIL;
> +
> +	count = min(count, rcx);
> +
> +	r = __kvm_fast_string_pio_in(vcpu, size, port, linear_addr, count);
> +
> +	if (r != EMULATE_DO_MMIO)
> +		return r;
> +
> +	vcpu->arch.fast_string_pio_ctxt.linear_addr = linear_addr;
> +	vcpu->arch.fast_string_pio_ctxt.ad_bytes = ad_bytes;
> +	vcpu->arch.complete_userspace_io = complete_fast_string_pio;
> +	return EMULATE_DO_MMIO;
> +}
> +EXPORT_SYMBOL_GPL(kvm_fast_string_pio_in);
> +

-- 
error compiling committee.c: too many arguments to function



^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCHv4 3/5] KVM: emulator: move some address manipulation function out of emulator code.
  2012-07-19  7:40 ` [PATCHv4 3/5] KVM: emulator: move some address manipulation function out of emulator code Gleb Natapov
@ 2012-07-19 10:42   ` Avi Kivity
  2012-07-19 10:49     ` Gleb Natapov
  0 siblings, 1 reply; 16+ messages in thread
From: Avi Kivity @ 2012-07-19 10:42 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm, mtosatti

On 07/19/2012 10:40 AM, Gleb Natapov wrote:
> The functions will be used outside of the emulator.
> 
> Signed-off-by: Gleb Natapov <gleb@redhat.com>
> ---
>  arch/x86/include/asm/kvm_host.h |   25 +++++++++++++++++++++++++
>  arch/x86/kvm/emulate.c          |   15 ++-------------
>  2 files changed, 27 insertions(+), 13 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index de2aff8..6212575 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -980,4 +980,29 @@ int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data);
>  void kvm_handle_pmu_event(struct kvm_vcpu *vcpu);
>  void kvm_deliver_pmi(struct kvm_vcpu *vcpu);
>  
> +static inline unsigned long kvm_ad_mask(u8 ad_bytes)
> +{
> +	return (1UL << (ad_bytes << 3)) - 1;
> +}
> +
> +/* Access/update address held in a register, based on addressing mode. */
> +static inline unsigned long
> +kvm_address_mask(u8 ad_bytes, unsigned long reg)
> +{
> +	if (ad_bytes == sizeof(unsigned long))
> +		return reg;
> +	else
> +		return reg & kvm_ad_mask(ad_bytes);
> +}
> +
> +static inline void
> +kvm_register_address_increment(u8 ad_bytes, unsigned long *reg, int inc)
> +{
> +	if (ad_bytes == sizeof(unsigned long))
> +		*reg += inc;
> +	else
> +		*reg = (*reg & ~kvm_ad_mask(ad_bytes)) |
> +			((*reg + inc) & kvm_ad_mask(ad_bytes));
> +}
> +
>  #endif /* _ASM_X86_KVM_HOST_H */
> diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
> index 79899df..e317588 100644
> --- a/arch/x86/kvm/emulate.c
> +++ b/arch/x86/kvm/emulate.c
> @@ -433,19 +433,11 @@ static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt,
>  	return ctxt->ops->intercept(ctxt, &info, stage);
>  }
>  
> -static inline unsigned long ad_mask(struct x86_emulate_ctxt *ctxt)
> -{
> -	return (1UL << (ctxt->ad_bytes << 3)) - 1;
> -}
> -
>  /* Access/update address held in a register, based on addressing mode. */
>  static inline unsigned long
>  address_mask(struct x86_emulate_ctxt *ctxt, unsigned long reg)
>  {
> -	if (ctxt->ad_bytes == sizeof(unsigned long))
> -		return reg;
> -	else
> -		return reg & ad_mask(ctxt);
> +	return kvm_address_mask(ctxt->ad_bytes, reg);
>  }
>  
>  static inline unsigned long
> @@ -457,10 +449,7 @@ register_address(struct x86_emulate_ctxt *ctxt, unsigned long reg)
>  static inline void
>  register_address_increment(struct x86_emulate_ctxt *ctxt, unsigned long *reg, int inc)
>  {
> -	if (ctxt->ad_bytes == sizeof(unsigned long))
> -		*reg += inc;
> -	else
> -		*reg = (*reg & ~ad_mask(ctxt)) | ((*reg + inc) & ad_mask(ctxt));
> +	return kvm_register_address_increment(ctxt->ad_bytes, reg, inc);
>  }

All those exports suggest it's better to move the fast path into the
emulator.  Suppose we change register access to use callbacks instead of
bulk load/save, could we reuse the exising code?


-- 
error compiling committee.c: too many arguments to function



^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCHv4 3/5] KVM: emulator: move some address manipulation function out of emulator code.
  2012-07-19 10:42   ` Avi Kivity
@ 2012-07-19 10:49     ` Gleb Natapov
  2012-07-19 13:34       ` Avi Kivity
  0 siblings, 1 reply; 16+ messages in thread
From: Gleb Natapov @ 2012-07-19 10:49 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, mtosatti

On Thu, Jul 19, 2012 at 01:42:31PM +0300, Avi Kivity wrote:
> On 07/19/2012 10:40 AM, Gleb Natapov wrote:
> > The functions will be used outside of the emulator.
> > 
> > Signed-off-by: Gleb Natapov <gleb@redhat.com>
> > ---
> >  arch/x86/include/asm/kvm_host.h |   25 +++++++++++++++++++++++++
> >  arch/x86/kvm/emulate.c          |   15 ++-------------
> >  2 files changed, 27 insertions(+), 13 deletions(-)
> > 
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index de2aff8..6212575 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -980,4 +980,29 @@ int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data);
> >  void kvm_handle_pmu_event(struct kvm_vcpu *vcpu);
> >  void kvm_deliver_pmi(struct kvm_vcpu *vcpu);
> >  
> > +static inline unsigned long kvm_ad_mask(u8 ad_bytes)
> > +{
> > +	return (1UL << (ad_bytes << 3)) - 1;
> > +}
> > +
> > +/* Access/update address held in a register, based on addressing mode. */
> > +static inline unsigned long
> > +kvm_address_mask(u8 ad_bytes, unsigned long reg)
> > +{
> > +	if (ad_bytes == sizeof(unsigned long))
> > +		return reg;
> > +	else
> > +		return reg & kvm_ad_mask(ad_bytes);
> > +}
> > +
> > +static inline void
> > +kvm_register_address_increment(u8 ad_bytes, unsigned long *reg, int inc)
> > +{
> > +	if (ad_bytes == sizeof(unsigned long))
> > +		*reg += inc;
> > +	else
> > +		*reg = (*reg & ~kvm_ad_mask(ad_bytes)) |
> > +			((*reg + inc) & kvm_ad_mask(ad_bytes));
> > +}
> > +
> >  #endif /* _ASM_X86_KVM_HOST_H */
> > diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
> > index 79899df..e317588 100644
> > --- a/arch/x86/kvm/emulate.c
> > +++ b/arch/x86/kvm/emulate.c
> > @@ -433,19 +433,11 @@ static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt,
> >  	return ctxt->ops->intercept(ctxt, &info, stage);
> >  }
> >  
> > -static inline unsigned long ad_mask(struct x86_emulate_ctxt *ctxt)
> > -{
> > -	return (1UL << (ctxt->ad_bytes << 3)) - 1;
> > -}
> > -
> >  /* Access/update address held in a register, based on addressing mode. */
> >  static inline unsigned long
> >  address_mask(struct x86_emulate_ctxt *ctxt, unsigned long reg)
> >  {
> > -	if (ctxt->ad_bytes == sizeof(unsigned long))
> > -		return reg;
> > -	else
> > -		return reg & ad_mask(ctxt);
> > +	return kvm_address_mask(ctxt->ad_bytes, reg);
> >  }
> >  
> >  static inline unsigned long
> > @@ -457,10 +449,7 @@ register_address(struct x86_emulate_ctxt *ctxt, unsigned long reg)
> >  static inline void
> >  register_address_increment(struct x86_emulate_ctxt *ctxt, unsigned long *reg, int inc)
> >  {
> > -	if (ctxt->ad_bytes == sizeof(unsigned long))
> > -		*reg += inc;
> > -	else
> > -		*reg = (*reg & ~ad_mask(ctxt)) | ((*reg + inc) & ad_mask(ctxt));
> > +	return kvm_register_address_increment(ctxt->ad_bytes, reg, inc);
> >  }
> 
> All those exports suggest it's better to move the fast path into the
> emulator. 

We've already being through that. Putting the code into emulator gives
us nothing unless it also works on emulator context and working on
partially initialized emulator context is first dangerous and second
slower.

>  Suppose we change register access to use callbacks instead of
> bulk load/save, could we reuse the exising code?
> 
I do not see that problem. This helper function do basic arithmetics
on three values. The value itself will be fetched on demand by the emulator.

--
			Gleb.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCHv4 4/5] KVM: emulator: make linearize() callable from outside of emulator
  2012-07-19 10:32   ` Avi Kivity
@ 2012-07-19 10:51     ` Gleb Natapov
  2012-07-19 12:52       ` Avi Kivity
  0 siblings, 1 reply; 16+ messages in thread
From: Gleb Natapov @ 2012-07-19 10:51 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, mtosatti

On Thu, Jul 19, 2012 at 01:32:59PM +0300, Avi Kivity wrote:
> On 07/19/2012 10:40 AM, Gleb Natapov wrote:
> > The function will be used outside of the emulator.
> > 
> > Signed-off-by: Gleb Natapov <gleb@redhat.com>
> > ---
> >  arch/x86/include/asm/kvm_emulate.h |   16 +++++
> >  arch/x86/kvm/emulate.c             |  114 ++++++++++++++++++++----------------
> >  2 files changed, 79 insertions(+), 51 deletions(-)
> > 
> > diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
> > index 7c276ca..9ce651b 100644
> > --- a/arch/x86/include/asm/kvm_emulate.h
> > +++ b/arch/x86/include/asm/kvm_emulate.h
> > @@ -374,6 +374,21 @@ enum x86_intercept {
> >  	nr_x86_intercepts
> >  };
> >  
> > +struct x86_linearize_params
> > +{
> > +	enum x86emul_mode mode;
> > +	ulong ea;
> > +	unsigned size;
> > +	unsigned seg;
> > +	struct desc_struct desc;
> > +	u16 sel;
> > +	bool usable;
> > +	bool write;
> > +	bool fetch;
> > +	u8 ad_bytes;
> > +	unsigned cpl;
> > +};
> > +
> > @@ -470,14 +470,6 @@ static void set_seg_override(struct x86_emulate_ctxt *ctxt, int seg)
> >  	ctxt->seg_override = seg;
> >  }
> >  
> > -static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
> > -{
> > -	if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS)
> > -		return 0;
> > -
> > -	return ctxt->ops->get_cached_segment_base(ctxt, seg);
> > -}
> > -
> 
> > +int x86_linearize(struct x86_linearize_params *p, ulong *linear)
> >  {
> > -	struct desc_struct desc;
> > -	bool usable;
> >  	ulong la;
> >  	u32 lim;
> > -	u16 sel;
> >  	unsigned cpl, rpl;
> >  
> > -	la = seg_base(ctxt, addr.seg) + addr.ea;
> > -	switch (ctxt->mode) {
> > +	la = get_desc_base(&p->desc) + p->ea;
> 
> This makes 64-bit mode slower, since before the patch it avoided reading
> the segment base for non-fs/gs segments, and only read the segment base
> for fs/gs.  After the patch we always execute 4 VMREADs (and decode the
> results).
> 
That's easy to fix by making caller prepare fake desc if mode is 64-bit
and segment is non-fs/gs. The question is if this even measurable?

--
			Gleb.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCHv4 5/5] KVM: Provide fast path for "rep ins" emulation if possible.
  2012-07-19 10:37   ` Avi Kivity
@ 2012-07-19 11:09     ` Gleb Natapov
  0 siblings, 0 replies; 16+ messages in thread
From: Gleb Natapov @ 2012-07-19 11:09 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, mtosatti

On Thu, Jul 19, 2012 at 01:37:32PM +0300, Avi Kivity wrote:
> On 07/19/2012 10:40 AM, Gleb Natapov wrote:
> > "rep ins" emulation is going through emulator now. This is slow because
> > emulator knows how to write back only one datum at a time. This patch
> > provides fast path for the instruction in certain conditions. The
> > conditions are: DF flag is not set, destination memory is RAM and single
> > datum does not cross page boundary. If fast path code fails it falls
> > back to emulation.
> > 
> > +
> > +int kvm_fast_string_pio_in(struct kvm_vcpu *vcpu, int size,
> > +		unsigned short port, u8 ad_bytes_idx)
> > +{
> > +	unsigned long rdi = kvm_register_read(vcpu, VCPU_REGS_RDI);
> > +	unsigned long linear_addr;
> > +	unsigned long rcx = kvm_register_read(vcpu, VCPU_REGS_RCX), count;
> > +	u8 ad_bytes;
> > +	int r;
> > +
> > +	if (ad_bytes_idx > 2)
> > +		return EMULATE_FAIL;
> > +
> > +	ad_bytes = (u8[]){2, 4, 8}[ad_bytes_idx];
> > +
> 
> 2 << logs2_ad_bytes instead of this nice functional programming style.
> 
Heh, my way looks cooler :)

> > +	rcx = kvm_address_mask(ad_bytes, rcx);
> > +
> > +	if (rcx == 0) {
> > +		kvm_x86_ops->skip_emulated_instruction(vcpu);
> > +		return EMULATE_DONE;
> > +	}
> > +
> > +	if (kvm_get_rflags(vcpu) & X86_EFLAGS_DF)
> > +		return EMULATE_FAIL;
> > +
> > +	rdi = kvm_address_mask(ad_bytes, rdi);
> > +
> > +	r = kvm_linearize_address(vcpu, get_emulation_mode(vcpu),
> > +			rdi, VCPU_SREG_ES, rcx * size, true, false, ad_bytes,
> > +			&linear_addr);
> > +
> > +	if (r >= 0)
> > +		return EMULATE_FAIL;
> > +
> > +	count = (PAGE_SIZE - offset_in_page(linear_addr))/size;
> > +
> > +	if (count == 0) /* 'in' crosses page boundry */
> > +		return EMULATE_FAIL;
> > +
> > +	count = min(count, rcx);
> > +
> > +	r = __kvm_fast_string_pio_in(vcpu, size, port, linear_addr, count);
> > +
> > +	if (r != EMULATE_DO_MMIO)
> > +		return r;
> > +
> > +	vcpu->arch.fast_string_pio_ctxt.linear_addr = linear_addr;
> > +	vcpu->arch.fast_string_pio_ctxt.ad_bytes = ad_bytes;
> > +	vcpu->arch.complete_userspace_io = complete_fast_string_pio;
> > +	return EMULATE_DO_MMIO;
> > +}
> > +EXPORT_SYMBOL_GPL(kvm_fast_string_pio_in);
> > +
> 
> -- 
> error compiling committee.c: too many arguments to function
> 

--
			Gleb.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCHv4 4/5] KVM: emulator: make linearize() callable from outside of emulator
  2012-07-19 10:51     ` Gleb Natapov
@ 2012-07-19 12:52       ` Avi Kivity
  2012-07-19 12:54         ` Gleb Natapov
  0 siblings, 1 reply; 16+ messages in thread
From: Avi Kivity @ 2012-07-19 12:52 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm, mtosatti

On 07/19/2012 01:51 PM, Gleb Natapov wrote:

>> > +int x86_linearize(struct x86_linearize_params *p, ulong *linear)
>> >  {
>> > -	struct desc_struct desc;
>> > -	bool usable;
>> >  	ulong la;
>> >  	u32 lim;
>> > -	u16 sel;
>> >  	unsigned cpl, rpl;
>> >  
>> > -	la = seg_base(ctxt, addr.seg) + addr.ea;
>> > -	switch (ctxt->mode) {
>> > +	la = get_desc_base(&p->desc) + p->ea;
>> 
>> This makes 64-bit mode slower, since before the patch it avoided reading
>> the segment base for non-fs/gs segments, and only read the segment base
>> for fs/gs.  After the patch we always execute 4 VMREADs (and decode the
>> results).
>> 
> That's easy to fix by making caller prepare fake desc if mode is 64-bit
> and segment is non-fs/gs. The question is if this even measurable?

I'm sure it will be measurable, esp. on older processors.  Why not
measure it?


-- 
error compiling committee.c: too many arguments to function



^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCHv4 4/5] KVM: emulator: make linearize() callable from outside of emulator
  2012-07-19 12:52       ` Avi Kivity
@ 2012-07-19 12:54         ` Gleb Natapov
  0 siblings, 0 replies; 16+ messages in thread
From: Gleb Natapov @ 2012-07-19 12:54 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, mtosatti

On Thu, Jul 19, 2012 at 03:52:15PM +0300, Avi Kivity wrote:
> On 07/19/2012 01:51 PM, Gleb Natapov wrote:
> 
> >> > +int x86_linearize(struct x86_linearize_params *p, ulong *linear)
> >> >  {
> >> > -	struct desc_struct desc;
> >> > -	bool usable;
> >> >  	ulong la;
> >> >  	u32 lim;
> >> > -	u16 sel;
> >> >  	unsigned cpl, rpl;
> >> >  
> >> > -	la = seg_base(ctxt, addr.seg) + addr.ea;
> >> > -	switch (ctxt->mode) {
> >> > +	la = get_desc_base(&p->desc) + p->ea;
> >> 
> >> This makes 64-bit mode slower, since before the patch it avoided reading
> >> the segment base for non-fs/gs segments, and only read the segment base
> >> for fs/gs.  After the patch we always execute 4 VMREADs (and decode the
> >> results).
> >> 
> > That's easy to fix by making caller prepare fake desc if mode is 64-bit
> > and segment is non-fs/gs. The question is if this even measurable?
> 
> I'm sure it will be measurable, esp. on older processors.  Why not
> measure it?
> 
> 
It is easier to just fix it :) Will do and resend if you are agree with
general approach.

--
			Gleb.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCHv4 3/5] KVM: emulator: move some address manipulation function out of emulator code.
  2012-07-19 10:49     ` Gleb Natapov
@ 2012-07-19 13:34       ` Avi Kivity
  2012-07-19 13:43         ` Gleb Natapov
  0 siblings, 1 reply; 16+ messages in thread
From: Avi Kivity @ 2012-07-19 13:34 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm, mtosatti

On 07/19/2012 01:49 PM, Gleb Natapov wrote:
>> All those exports suggest it's better to move the fast path into the
>> emulator. 
> 
> We've already being through that. Putting the code into emulator gives
> us nothing unless it also works on emulator context and working on
> partially initialized emulator context is first dangerous and second
> slower.

We can make it work on an x86_pio_ctxt.  What it gives us is common
code. (x86_emulate_ctxt can include a x86_pio_ctxt when it does pio).

> 
>>  Suppose we change register access to use callbacks instead of
>> bulk load/save, could we reuse the exising code?
>> 
> I do not see that problem. This helper function do basic arithmetics
> on three values. The value itself will be fetched on demand by the emulator.

I meant to reduce the emulator initialization cost, so it isn't slow.

btw, I'm guessing that the main speedup comes not from avoiding the
decode, but by doing page-at-a-time instead of word-at-a-time.

-- 
error compiling committee.c: too many arguments to function



^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCHv4 3/5] KVM: emulator: move some address manipulation function out of emulator code.
  2012-07-19 13:34       ` Avi Kivity
@ 2012-07-19 13:43         ` Gleb Natapov
  0 siblings, 0 replies; 16+ messages in thread
From: Gleb Natapov @ 2012-07-19 13:43 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, mtosatti

On Thu, Jul 19, 2012 at 04:34:50PM +0300, Avi Kivity wrote:
> On 07/19/2012 01:49 PM, Gleb Natapov wrote:
> >> All those exports suggest it's better to move the fast path into the
> >> emulator. 
> > 
> > We've already being through that. Putting the code into emulator gives
> > us nothing unless it also works on emulator context and working on
> > partially initialized emulator context is first dangerous and second
> > slower.
> 
> We can make it work on an x86_pio_ctxt.  What it gives us is common
> code. (x86_emulate_ctxt can include a x86_pio_ctxt when it does pio).
> 
My patches do similar thing, but instead of x86_pio_ctxt they use
x86_linearize_ctx. The code is common.

> > 
> >>  Suppose we change register access to use callbacks instead of
> >> bulk load/save, could we reuse the exising code?
> >> 
> > I do not see that problem. This helper function do basic arithmetics
> > on three values. The value itself will be fetched on demand by the emulator.
> 
> I meant to reduce the emulator initialization cost, so it isn't slow.
> 
> btw, I'm guessing that the main speedup comes not from avoiding the
> decode, but by doing page-at-a-time instead of word-at-a-time.
> 
And hacking emulator doing page at a time is not trivial an complicates
it for no good reason. BTW you were the one who suggested implementing
fast pio as a separate code path.

--
			Gleb.

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2012-07-19 13:43 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-07-19  7:40 [PATCHv4 0/5] improve speed of "rep ins" emulation Gleb Natapov
2012-07-19  7:40 ` [PATCHv4 1/5] Provide userspace IO exit completion callback Gleb Natapov
2012-07-19  7:40 ` [PATCHv4 2/5] KVM: emulator: make x86 emulation modes enum instead of defines Gleb Natapov
2012-07-19  7:40 ` [PATCHv4 3/5] KVM: emulator: move some address manipulation function out of emulator code Gleb Natapov
2012-07-19 10:42   ` Avi Kivity
2012-07-19 10:49     ` Gleb Natapov
2012-07-19 13:34       ` Avi Kivity
2012-07-19 13:43         ` Gleb Natapov
2012-07-19  7:40 ` [PATCHv4 4/5] KVM: emulator: make linearize() callable from outside of emulator Gleb Natapov
2012-07-19 10:32   ` Avi Kivity
2012-07-19 10:51     ` Gleb Natapov
2012-07-19 12:52       ` Avi Kivity
2012-07-19 12:54         ` Gleb Natapov
2012-07-19  7:40 ` [PATCHv4 5/5] KVM: Provide fast path for "rep ins" emulation if possible Gleb Natapov
2012-07-19 10:37   ` Avi Kivity
2012-07-19 11:09     ` Gleb Natapov

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.